summaryrefslogtreecommitdiff
path: root/mm/ksm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-23 09:58:07 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-23 09:58:07 -0800
commit5c00ff742bf5caf85f60e1c73999f99376fb865d (patch)
treefa484e83c27af79f1c0511e7e0673507461c9379 /mm/ksm.c
parent228a1157fb9fec47eb135b51c0202b574e079ebf (diff)
parent2532e6c74a67e65b95f310946e0c0e0a41b3a34b (diff)
downloadlwn-5c00ff742bf5caf85f60e1c73999f99376fb865d.tar.gz
lwn-5c00ff742bf5caf85f60e1c73999f99376fb865d.zip
Merge tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - The series "zram: optimal post-processing target selection" from Sergey Senozhatsky improves zram's post-processing selection algorithm. This leads to improved memory savings. - Wei Yang has gone to town on the mapletree code, contributing several series which clean up the implementation: - "refine mas_mab_cp()" - "Reduce the space to be cleared for maple_big_node" - "maple_tree: simplify mas_push_node()" - "Following cleanup after introduce mas_wr_store_type()" - "refine storing null" - The series "selftests/mm: hugetlb_fault_after_madv improvements" from David Hildenbrand fixes this selftest for s390. - The series "introduce pte_offset_map_{ro|rw}_nolock()" from Qi Zheng implements some rationaizations and cleanups in the page mapping code. - The series "mm: optimize shadow entries removal" from Shakeel Butt optimizes the file truncation code by speeding up the handling of shadow entries. - The series "Remove PageKsm()" from Matthew Wilcox completes the migration of this flag over to being a folio-based flag. - The series "Unify hugetlb into arch_get_unmapped_area functions" from Oscar Salvador implements a bunch of consolidations and cleanups in the hugetlb code. - The series "Do not shatter hugezeropage on wp-fault" from Dev Jain takes away the wp-fault time practice of turning a huge zero page into small pages. Instead we replace the whole thing with a THP. More consistent cleaner and potentiall saves a large number of pagefaults. - The series "percpu: Add a test case and fix for clang" from Andy Shevchenko enhances and fixes the kernel's built in percpu test code. - The series "mm/mremap: Remove extra vma tree walk" from Liam Howlett optimizes mremap() by avoiding doing things which we didn't need to do. - The series "Improve the tmpfs large folio read performance" from Baolin Wang teaches tmpfs to copy data into userspace at the folio size rather than as individual pages. A 20% speedup was observed. - The series "mm/damon/vaddr: Fix issue in damon_va_evenly_split_region()" fro Zheng Yejian fixes DAMON splitting. - The series "memcg-v1: fully deprecate charge moving" from Shakeel Butt removes the long-deprecated memcgv2 charge moving feature. - The series "fix error handling in mmap_region() and refactor" from Lorenzo Stoakes cleanup up some of the mmap() error handling and addresses some potential performance issues. - The series "x86/module: use large ROX pages for text allocations" from Mike Rapoport teaches x86 to use large pages for read-only-execute module text. - The series "page allocation tag compression" from Suren Baghdasaryan is followon maintenance work for the new page allocation profiling feature. - The series "page->index removals in mm" from Matthew Wilcox remove most references to page->index in mm/. A slow march towards shrinking struct page. - The series "damon/{self,kunit}tests: minor fixups for DAMON debugfs interface tests" from Andrew Paniakin performs maintenance work for DAMON's self testing code. - The series "mm: zswap swap-out of large folios" from Kanchana Sridhar improves zswap's batching of compression and decompression. It is a step along the way towards using Intel IAA hardware acceleration for this zswap operation. - The series "kasan: migrate the last module test to kunit" from Sabyrzhan Tasbolatov completes the migration of the KASAN built-in tests over to the KUnit framework. - The series "implement lightweight guard pages" from Lorenzo Stoakes permits userapace to place fault-generating guard pages within a single VMA, rather than requiring that multiple VMAs be created for this. Improved efficiencies for userspace memory allocators are expected. - The series "memcg: tracepoint for flushing stats" from JP Kobryn uses tracepoints to provide increased visibility into memcg stats flushing activity. - The series "zram: IDLE flag handling fixes" from Sergey Senozhatsky fixes a zram buglet which potentially affected performance. - The series "mm: add more kernel parameters to control mTHP" from Maíra Canal enhances our ability to control/configuremultisize THP from the kernel boot command line. - The series "kasan: few improvements on kunit tests" from Sabyrzhan Tasbolatov has a couple of fixups for the KASAN KUnit tests. - The series "mm/list_lru: Split list_lru lock into per-cgroup scope" from Kairui Song optimizes list_lru memory utilization when lockdep is enabled. * tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (215 commits) cma: enforce non-zero pageblock_order during cma_init_reserved_mem() mm/kfence: add a new kunit test test_use_after_free_read_nofault() zram: fix NULL pointer in comp_algorithm_show() memcg/hugetlb: add hugeTLB counters to memcg vmstat: call fold_vm_zone_numa_events() before show per zone NUMA event mm: mmap_lock: check trace_mmap_lock_$type_enabled() instead of regcount zram: ZRAM_DEF_COMP should depend on ZRAM MAINTAINERS/MEMORY MANAGEMENT: add document files for mm Docs/mm/damon: recommend academic papers to read and/or cite mm: define general function pXd_init() kmemleak: iommu/iova: fix transient kmemleak false positive mm/list_lru: simplify the list_lru walk callback function mm/list_lru: split the lock to per-cgroup scope mm/list_lru: simplify reparenting and initial allocation mm/list_lru: code clean up for reparenting mm/list_lru: don't export list_lru_add mm/list_lru: don't pass unnecessary key parameters kasan: add kunit tests for kmalloc_track_caller, kmalloc_node_track_caller kasan: change kasan_atomics kunit test as KUNIT_CASE_SLOW kasan: use EXPORT_SYMBOL_IF_KUNIT to export symbols ...
Diffstat (limited to 'mm/ksm.c')
-rw-r--r--mm/ksm.c110
1 files changed, 55 insertions, 55 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index a2e2a521df0a..31a9bc365437 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -20,7 +20,6 @@
#include <linux/mman.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
-#include <linux/sched/coredump.h>
#include <linux/sched/cputime.h>
#include <linux/rwsem.h>
#include <linux/pagemap.h>
@@ -657,7 +656,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_v
*
* VM_FAULT_SIGBUS could occur if we race with truncation of the
* backing file, which also invalidates anonymous pages: that's
- * okay, that truncation will have unmapped the PageKsm for us.
+ * okay, that truncation will have unmapped the KSM page for us.
*
* VM_FAULT_OOM: at the time of writing (late July 2009), setting
* aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
@@ -1052,7 +1051,8 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
return err;
}
-static inline struct ksm_stable_node *folio_stable_node(struct folio *folio)
+static inline
+struct ksm_stable_node *folio_stable_node(const struct folio *folio)
{
return folio_test_ksm(folio) ? folio_raw_mapping(folio) : NULL;
}
@@ -1257,7 +1257,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct folio *folio,
if (WARN_ON_ONCE(folio_test_large(folio)))
return err;
- pvmw.address = page_address_in_vma(&folio->page, vma);
+ pvmw.address = page_address_in_vma(folio, folio_page(folio, 0), vma);
if (pvmw.address == -EFAULT)
goto out;
@@ -1341,7 +1341,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
{
struct folio *kfolio = page_folio(kpage);
struct mm_struct *mm = vma->vm_mm;
- struct folio *folio;
+ struct folio *folio = page_folio(page);
pmd_t *pmd;
pmd_t pmde;
pte_t *ptep;
@@ -1351,7 +1351,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
int err = -EFAULT;
struct mmu_notifier_range range;
- addr = page_address_in_vma(page, vma);
+ addr = page_address_in_vma(folio, page, vma);
if (addr == -EFAULT)
goto out;
@@ -1417,7 +1417,6 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
ptep_clear_flush(vma, addr, ptep);
set_pte_at(mm, addr, ptep, newpte);
- folio = page_folio(page);
folio_remove_rmap_pte(folio, page, vma);
if (!folio_mapped(folio))
folio_free_swap(folio);
@@ -1435,7 +1434,7 @@ out:
* try_to_merge_one_page - take two pages and merge them into one
* @vma: the vma that holds the pte pointing to page
* @page: the PageAnon page that we want to replace with kpage
- * @kpage: the PageKsm page that we want to map instead of page,
+ * @kpage: the KSM page that we want to map instead of page,
* or NULL the first time when we want to use page as kpage.
*
* This function returns 0 if the pages were merged, -EFAULT otherwise.
@@ -1443,28 +1442,29 @@ out:
static int try_to_merge_one_page(struct vm_area_struct *vma,
struct page *page, struct page *kpage)
{
+ struct folio *folio = page_folio(page);
pte_t orig_pte = __pte(0);
int err = -EFAULT;
if (page == kpage) /* ksm page forked */
return 0;
- if (!PageAnon(page))
+ if (!folio_test_anon(folio))
goto out;
/*
* We need the folio lock to read a stable swapcache flag in
- * write_protect_page(). We use trylock_page() instead of
- * lock_page() because we don't want to wait here - we
- * prefer to continue scanning and merging different pages,
- * then come back to this page when it is unlocked.
+ * write_protect_page(). We trylock because we don't want to wait
+ * here - we prefer to continue scanning and merging different
+ * pages, then come back to this page when it is unlocked.
*/
- if (!trylock_page(page))
+ if (!folio_trylock(folio))
goto out;
- if (PageTransCompound(page)) {
+ if (folio_test_large(folio)) {
if (split_huge_page(page))
goto out_unlock;
+ folio = page_folio(page);
}
/*
@@ -1473,28 +1473,28 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
* ptes are necessarily already write-protected. But in either
* case, we need to lock and check page_count is not raised.
*/
- if (write_protect_page(vma, page_folio(page), &orig_pte) == 0) {
+ if (write_protect_page(vma, folio, &orig_pte) == 0) {
if (!kpage) {
/*
- * While we hold page lock, upgrade page from
- * PageAnon+anon_vma to PageKsm+NULL stable_node:
+ * While we hold folio lock, upgrade folio from
+ * anon to a NULL stable_node with the KSM flag set:
* stable_tree_insert() will update stable_node.
*/
- folio_set_stable_node(page_folio(page), NULL);
- mark_page_accessed(page);
+ folio_set_stable_node(folio, NULL);
+ folio_mark_accessed(folio);
/*
- * Page reclaim just frees a clean page with no dirty
+ * Page reclaim just frees a clean folio with no dirty
* ptes: make sure that the ksm page would be swapped.
*/
- if (!PageDirty(page))
- SetPageDirty(page);
+ if (!folio_test_dirty(folio))
+ folio_mark_dirty(folio);
err = 0;
} else if (pages_identical(page, kpage))
err = replace_page(vma, page, kpage, orig_pte);
}
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
out:
return err;
}
@@ -1582,7 +1582,7 @@ out:
* Note that this function upgrades page to ksm page: if one of the pages
* is already a ksm page, try_to_merge_with_ksm_page should be used.
*/
-static struct page *try_to_merge_two_pages(struct ksm_rmap_item *rmap_item,
+static struct folio *try_to_merge_two_pages(struct ksm_rmap_item *rmap_item,
struct page *page,
struct ksm_rmap_item *tree_rmap_item,
struct page *tree_page)
@@ -1600,7 +1600,7 @@ static struct page *try_to_merge_two_pages(struct ksm_rmap_item *rmap_item,
if (err)
break_cow(rmap_item);
}
- return err ? NULL : page;
+ return err ? NULL : page_folio(page);
}
static __always_inline
@@ -1787,9 +1787,9 @@ static __always_inline struct folio *chain(struct ksm_stable_node **s_n_d,
* with identical content to the page that we are scanning right now.
*
* This function returns the stable tree node of identical content if found,
- * NULL otherwise.
+ * -EBUSY if the stable node's page is being migrated, NULL otherwise.
*/
-static struct page *stable_tree_search(struct page *page)
+static struct folio *stable_tree_search(struct page *page)
{
int nid;
struct rb_root *root;
@@ -1804,7 +1804,7 @@ static struct page *stable_tree_search(struct page *page)
if (page_node && page_node->head != &migrate_nodes) {
/* ksm page forked */
folio_get(folio);
- return &folio->page;
+ return folio;
}
nid = get_kpfn_nid(folio_pfn(folio));
@@ -1899,7 +1899,7 @@ again:
folio_put(tree_folio);
goto replace;
}
- return &tree_folio->page;
+ return tree_folio;
}
}
@@ -1913,7 +1913,7 @@ again:
out:
if (is_page_sharing_candidate(page_node)) {
folio_get(folio);
- return &folio->page;
+ return folio;
} else
return NULL;
@@ -1963,7 +1963,7 @@ replace:
}
stable_node_dup->head = &migrate_nodes;
list_add(&stable_node_dup->list, stable_node_dup->head);
- return &folio->page;
+ return folio;
chain_append:
/*
@@ -2217,7 +2217,7 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
struct ksm_rmap_item *tree_rmap_item;
struct page *tree_page = NULL;
struct ksm_stable_node *stable_node;
- struct page *kpage;
+ struct folio *kfolio;
unsigned int checksum;
int err;
bool max_page_sharing_bypass = false;
@@ -2259,31 +2259,31 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
return;
}
- /* We first start with searching the page inside the stable tree */
- kpage = stable_tree_search(page);
- if (kpage == page && rmap_item->head == stable_node) {
- put_page(kpage);
+ /* Start by searching for the folio in the stable tree */
+ kfolio = stable_tree_search(page);
+ if (&kfolio->page == page && rmap_item->head == stable_node) {
+ folio_put(kfolio);
return;
}
remove_rmap_item_from_tree(rmap_item);
- if (kpage) {
- if (PTR_ERR(kpage) == -EBUSY)
+ if (kfolio) {
+ if (kfolio == ERR_PTR(-EBUSY))
return;
- err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
+ err = try_to_merge_with_ksm_page(rmap_item, page, &kfolio->page);
if (!err) {
/*
* The page was successfully merged:
* add its rmap_item to the stable tree.
*/
- lock_page(kpage);
- stable_tree_append(rmap_item, page_stable_node(kpage),
+ folio_lock(kfolio);
+ stable_tree_append(rmap_item, folio_stable_node(kfolio),
max_page_sharing_bypass);
- unlock_page(kpage);
+ folio_unlock(kfolio);
}
- put_page(kpage);
+ folio_put(kfolio);
return;
}
@@ -2292,7 +2292,7 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
if (tree_rmap_item) {
bool split;
- kpage = try_to_merge_two_pages(rmap_item, page,
+ kfolio = try_to_merge_two_pages(rmap_item, page,
tree_rmap_item, tree_page);
/*
* If both pages we tried to merge belong to the same compound
@@ -2307,20 +2307,20 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
split = PageTransCompound(page)
&& compound_head(page) == compound_head(tree_page);
put_page(tree_page);
- if (kpage) {
+ if (kfolio) {
/*
* The pages were successfully merged: insert new
* node in the stable tree and add both rmap_items.
*/
- lock_page(kpage);
- stable_node = stable_tree_insert(page_folio(kpage));
+ folio_lock(kfolio);
+ stable_node = stable_tree_insert(kfolio);
if (stable_node) {
stable_tree_append(tree_rmap_item, stable_node,
false);
stable_tree_append(rmap_item, stable_node,
false);
}
- unlock_page(kpage);
+ folio_unlock(kfolio);
/*
* If we fail to insert the page into the stable tree,
@@ -2401,10 +2401,10 @@ static unsigned int skip_age(rmap_age_t age)
/*
* Determines if a page should be skipped for the current scan.
*
- * @page: page to check
+ * @folio: folio containing the page to check
* @rmap_item: associated rmap_item of page
*/
-static bool should_skip_rmap_item(struct page *page,
+static bool should_skip_rmap_item(struct folio *folio,
struct ksm_rmap_item *rmap_item)
{
rmap_age_t age;
@@ -2417,7 +2417,7 @@ static bool should_skip_rmap_item(struct page *page,
* will essentially ignore them, but we still have to process them
* properly.
*/
- if (PageKsm(page))
+ if (folio_test_ksm(folio))
return false;
age = rmap_item->age;
@@ -2560,7 +2560,7 @@ next_mm:
ksm_scan.rmap_list =
&rmap_item->rmap_list;
- if (should_skip_rmap_item(tmp_page, rmap_item)) {
+ if (should_skip_rmap_item(folio, rmap_item)) {
folio_put(folio);
goto next_page;
}
@@ -2970,7 +2970,7 @@ struct folio *ksm_might_need_to_copy(struct folio *folio,
if (!folio_test_uptodate(folio))
return folio; /* let do_swap_page report the error */
- new_folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, addr, false);
+ new_folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, addr);
if (new_folio &&
mem_cgroup_charge(new_folio, vma->vm_mm, GFP_KERNEL)) {
folio_put(new_folio);
@@ -3067,7 +3067,7 @@ again:
/*
* Collect processes when the error hit an ksm page.
*/
-void collect_procs_ksm(struct folio *folio, struct page *page,
+void collect_procs_ksm(const struct folio *folio, const struct page *page,
struct list_head *to_kill, int force_early)
{
struct ksm_stable_node *stable_node;