summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-25 11:44:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-25 11:44:43 -0700
commit6f0edbb833ec16ab2042073af4846152b455104d (patch)
tree5295baa2ada00c7b5eb45c79b2a0d82f130b4f50 /mm
parent4942fed84b98cfb71d3cdff1a3df0072a57bbdfa (diff)
parente5548f85b4527c4c803b7eae7887c10bf8f90c97 (diff)
downloadlwn-6f0edbb833ec16ab2042073af4846152b455104d.tar.gz
lwn-6f0edbb833ec16ab2042073af4846152b455104d.zip
Merge tag 'mm-hotfixes-stable-2023-08-25-11-07' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton: "18 hotfixes. 13 are cc:stable and the remainder pertain to post-6.4 issues or aren't considered suitable for a -stable backport" * tag 'mm-hotfixes-stable-2023-08-25-11-07' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: shmem: fix smaps BUG sleeping while atomic selftests: cachestat: catch failing fsync test on tmpfs selftests: cachestat: test for cachestat availability maple_tree: disable mas_wr_append() when other readers are possible madvise:madvise_free_pte_range(): don't use mapcount() against large folio for sharing check madvise:madvise_free_huge_pmd(): don't use mapcount() against large folio for sharing check madvise:madvise_cold_or_pageout_pte_range(): don't use mapcount() against large folio for sharing check mm: multi-gen LRU: don't spin during memcg release mm: memory-failure: fix unexpected return value in soft_offline_page() radix tree: remove unused variable mm: add a call to flush_cache_vmap() in vmap_pfn() selftests/mm: FOLL_LONGTERM need to be updated to 0x100 nilfs2: fix general protection fault in nilfs_lookup_dirty_data_buffers() mm/gup: handle cont-PTE hugetlb pages correctly in gup_must_unshare() via GUP-fast selftests: cgroup: fix test_kmem_basic less than error mm: enable page walking API to lock vmas during the walk smaps: use vm_normal_page_pmd() instead of follow_trans_huge_pmd() mm/gup: reintroduce FOLL_NUMA as FOLL_HONOR_NUMA_FAULT
Diffstat (limited to 'mm')
-rw-r--r--mm/damon/vaddr.c2
-rw-r--r--mm/gup.c30
-rw-r--r--mm/hmm.c1
-rw-r--r--mm/huge_memory.c5
-rw-r--r--mm/internal.h17
-rw-r--r--mm/ksm.c25
-rw-r--r--mm/madvise.c9
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory-failure.c12
-rw-r--r--mm/mempolicy.c22
-rw-r--r--mm/migrate_device.c1
-rw-r--r--mm/mincore.c1
-rw-r--r--mm/mlock.c1
-rw-r--r--mm/mprotect.c1
-rw-r--r--mm/pagewalk.c36
-rw-r--r--mm/shmem.c6
-rw-r--r--mm/vmalloc.c4
-rw-r--r--mm/vmscan.c14
18 files changed, 146 insertions, 43 deletions
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 2fcc9731528a..e0e59d420fca 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -386,6 +386,7 @@ out:
static const struct mm_walk_ops damon_mkold_ops = {
.pmd_entry = damon_mkold_pmd_entry,
.hugetlb_entry = damon_mkold_hugetlb_entry,
+ .walk_lock = PGWALK_RDLOCK,
};
static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
@@ -525,6 +526,7 @@ out:
static const struct mm_walk_ops damon_young_ops = {
.pmd_entry = damon_young_pmd_entry,
.hugetlb_entry = damon_young_hugetlb_entry,
+ .walk_lock = PGWALK_RDLOCK,
};
static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
diff --git a/mm/gup.c b/mm/gup.c
index 76d222ccc3ff..6e2f9e9d6537 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -597,7 +597,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
pte = ptep_get(ptep);
if (!pte_present(pte))
goto no_page;
- if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+ if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags))
goto no_page;
page = vm_normal_page(vma, address, pte);
@@ -714,7 +714,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
if (likely(!pmd_trans_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
- if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
+ if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
return no_page_table(vma, flags);
ptl = pmd_lock(mm, pmd);
@@ -851,6 +851,10 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
return NULL;
+ /*
+ * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect
+ * to fail on PROT_NONE-mapped pages.
+ */
page = follow_page_mask(vma, address, foll_flags, &ctx);
if (ctx.pgmap)
put_dev_pagemap(ctx.pgmap);
@@ -2227,6 +2231,13 @@ static bool is_valid_gup_args(struct page **pages, int *locked,
gup_flags |= FOLL_UNLOCKABLE;
}
+ /*
+ * For now, always trigger NUMA hinting faults. Some GUP users like
+ * KVM require the hint to be as the calling context of GUP is
+ * functionally similar to a memory reference from task context.
+ */
+ gup_flags |= FOLL_HONOR_NUMA_FAULT;
+
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
@@ -2551,7 +2562,14 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
struct page *page;
struct folio *folio;
- if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+ /*
+ * Always fallback to ordinary GUP on PROT_NONE-mapped pages:
+ * pte_access_permitted() better should reject these pages
+ * either way: otherwise, GUP-fast might succeed in
+ * cases where ordinary GUP would fail due to VMA access
+ * permissions.
+ */
+ if (pte_protnone(pte))
goto pte_unmap;
if (!pte_access_permitted(pte, flags & FOLL_WRITE))
@@ -2970,8 +2988,8 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
pmd_devmap(pmd))) {
- if (pmd_protnone(pmd) &&
- !gup_can_follow_protnone(flags))
+ /* See gup_pte_range() */
+ if (pmd_protnone(pmd))
return 0;
if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
@@ -3151,7 +3169,7 @@ static int internal_get_user_pages_fast(unsigned long start,
if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
FOLL_FORCE | FOLL_PIN | FOLL_GET |
FOLL_FAST_ONLY | FOLL_NOFAULT |
- FOLL_PCI_P2PDMA)))
+ FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT)))
return -EINVAL;
if (gup_flags & FOLL_PIN)
diff --git a/mm/hmm.c b/mm/hmm.c
index 855e25e59d8f..277ddcab4947 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -562,6 +562,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
.pte_hole = hmm_vma_walk_hole,
.hugetlb_entry = hmm_vma_walk_hugetlb_entry,
.test_walk = hmm_vma_walk_test,
+ .walk_lock = PGWALK_RDLOCK,
};
/**
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index eb3678360b97..164d22365bde 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1467,8 +1467,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
return ERR_PTR(-EFAULT);
- /* Full NUMA hinting faults to serialise migration in fault paths */
- if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags))
+ if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags))
return NULL;
if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))
@@ -1613,7 +1612,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
* If other processes are mapping this folio, we couldn't discard
* the folio unless they all do MADV_FREE so let's skip the folio.
*/
- if (folio_mapcount(folio) != 1)
+ if (folio_estimated_sharers(folio) != 1)
goto out;
if (!folio_trylock(folio))
diff --git a/mm/internal.h b/mm/internal.h
index a7d9e980429a..8ed127c1c808 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -924,6 +924,13 @@ int migrate_device_coherent_page(struct page *page);
struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
int __must_check try_grab_page(struct page *page, unsigned int flags);
+/*
+ * mm/huge_memory.c
+ */
+struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmd,
+ unsigned int flags);
+
enum {
/* mark page accessed */
FOLL_TOUCH = 1 << 16,
@@ -998,6 +1005,16 @@ static inline bool gup_must_unshare(struct vm_area_struct *vma,
smp_rmb();
/*
+ * During GUP-fast we might not get called on the head page for a
+ * hugetlb page that is mapped using cont-PTE, because GUP-fast does
+ * not work with the abstracted hugetlb PTEs that always point at the
+ * head page. For hugetlb, PageAnonExclusive only applies on the head
+ * page (as it cannot be partially COW-shared), so lookup the head page.
+ */
+ if (unlikely(!PageHead(page) && PageHuge(page)))
+ page = compound_head(page);
+
+ /*
* Note that PageKsm() pages cannot be exclusive, and consequently,
* cannot get pinned.
*/
diff --git a/mm/ksm.c b/mm/ksm.c
index d20d7662419b..d7b5b95e936e 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -455,6 +455,12 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
static const struct mm_walk_ops break_ksm_ops = {
.pmd_entry = break_ksm_pmd_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops break_ksm_lock_vma_ops = {
+ .pmd_entry = break_ksm_pmd_entry,
+ .walk_lock = PGWALK_WRLOCK,
};
/*
@@ -470,16 +476,17 @@ static const struct mm_walk_ops break_ksm_ops = {
* of the process that owns 'vma'. We also do not want to enforce
* protection keys here anyway.
*/
-static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
{
vm_fault_t ret = 0;
+ const struct mm_walk_ops *ops = lock_vma ?
+ &break_ksm_lock_vma_ops : &break_ksm_ops;
do {
int ksm_page;
cond_resched();
- ksm_page = walk_page_range_vma(vma, addr, addr + 1,
- &break_ksm_ops, NULL);
+ ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL);
if (WARN_ON_ONCE(ksm_page < 0))
return ksm_page;
if (!ksm_page)
@@ -565,7 +572,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
mmap_read_lock(mm);
vma = find_mergeable_vma(mm, addr);
if (vma)
- break_ksm(vma, addr);
+ break_ksm(vma, addr, false);
mmap_read_unlock(mm);
}
@@ -871,7 +878,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
* in cmp_and_merge_page on one of the rmap_items we would be removing.
*/
static int unmerge_ksm_pages(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end, bool lock_vma)
{
unsigned long addr;
int err = 0;
@@ -882,7 +889,7 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
if (signal_pending(current))
err = -ERESTARTSYS;
else
- err = break_ksm(vma, addr);
+ err = break_ksm(vma, addr, lock_vma);
}
return err;
}
@@ -1029,7 +1036,7 @@ static int unmerge_and_remove_all_rmap_items(void)
if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
continue;
err = unmerge_ksm_pages(vma,
- vma->vm_start, vma->vm_end);
+ vma->vm_start, vma->vm_end, false);
if (err)
goto error;
}
@@ -2530,7 +2537,7 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
return 0;
if (vma->anon_vma) {
- err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
+ err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true);
if (err)
return err;
}
@@ -2668,7 +2675,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
return 0; /* just ignore the advice */
if (vma->anon_vma) {
- err = unmerge_ksm_pages(vma, start, end);
+ err = unmerge_ksm_pages(vma, start, end, true);
if (err)
return err;
}
diff --git a/mm/madvise.c b/mm/madvise.c
index 886f06066622..ec30f48f8f2e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -233,6 +233,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
static const struct mm_walk_ops swapin_walk_ops = {
.pmd_entry = swapin_walk_pmd_entry,
+ .walk_lock = PGWALK_RDLOCK,
};
static void shmem_swapin_range(struct vm_area_struct *vma,
@@ -383,7 +384,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
folio = pfn_folio(pmd_pfn(orig_pmd));
/* Do not interfere with other mappings of this folio */
- if (folio_mapcount(folio) != 1)
+ if (folio_estimated_sharers(folio) != 1)
goto huge_unlock;
if (pageout_anon_only_filter && !folio_test_anon(folio))
@@ -457,7 +458,7 @@ regular_folio:
if (folio_test_large(folio)) {
int err;
- if (folio_mapcount(folio) != 1)
+ if (folio_estimated_sharers(folio) != 1)
break;
if (pageout_anon_only_filter && !folio_test_anon(folio))
break;
@@ -534,6 +535,7 @@ regular_folio:
static const struct mm_walk_ops cold_walk_ops = {
.pmd_entry = madvise_cold_or_pageout_pte_range,
+ .walk_lock = PGWALK_RDLOCK,
};
static void madvise_cold_page_range(struct mmu_gather *tlb,
@@ -678,7 +680,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
if (folio_test_large(folio)) {
int err;
- if (folio_mapcount(folio) != 1)
+ if (folio_estimated_sharers(folio) != 1)
break;
if (!folio_trylock(folio))
break;
@@ -757,6 +759,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
static const struct mm_walk_ops madvise_free_walk_ops = {
.pmd_entry = madvise_free_pte_range,
+ .walk_lock = PGWALK_RDLOCK,
};
static int madvise_free_single_vma(struct vm_area_struct *vma,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e8ca4bdcb03c..315fd5f45e3c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6024,6 +6024,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
static const struct mm_walk_ops precharge_walk_ops = {
.pmd_entry = mem_cgroup_count_precharge_pte_range,
+ .walk_lock = PGWALK_RDLOCK,
};
static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
@@ -6303,6 +6304,7 @@ put: /* get_mctgt_type() gets & locks the page */
static const struct mm_walk_ops charge_walk_ops = {
.pmd_entry = mem_cgroup_move_charge_pte_range,
+ .walk_lock = PGWALK_RDLOCK,
};
static void mem_cgroup_move_charge(void)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 9a285038d765..fe121fdb05f7 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -831,6 +831,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
static const struct mm_walk_ops hwp_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
+ .walk_lock = PGWALK_RDLOCK,
};
/*
@@ -2740,10 +2741,13 @@ retry:
if (ret > 0) {
ret = soft_offline_in_use_page(page);
} else if (ret == 0) {
- if (!page_handle_poison(page, true, false) && try_again) {
- try_again = false;
- flags &= ~MF_COUNT_INCREASED;
- goto retry;
+ if (!page_handle_poison(page, true, false)) {
+ if (try_again) {
+ try_again = false;
+ flags &= ~MF_COUNT_INCREASED;
+ goto retry;
+ }
+ ret = -EBUSY;
}
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c53f8beeb507..ec2eaceffd74 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -718,6 +718,14 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
.hugetlb_entry = queue_folios_hugetlb,
.pmd_entry = queue_folios_pte_range,
.test_walk = queue_pages_test_walk,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = {
+ .hugetlb_entry = queue_folios_hugetlb,
+ .pmd_entry = queue_folios_pte_range,
+ .test_walk = queue_pages_test_walk,
+ .walk_lock = PGWALK_WRLOCK,
};
/*
@@ -738,7 +746,7 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
static int
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
nodemask_t *nodes, unsigned long flags,
- struct list_head *pagelist)
+ struct list_head *pagelist, bool lock_vma)
{
int err;
struct queue_pages qp = {
@@ -749,8 +757,10 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
.end = end,
.first = NULL,
};
+ const struct mm_walk_ops *ops = lock_vma ?
+ &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
- err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
+ err = walk_page_range(mm, start, end, ops, &qp);
if (!qp.first)
/* whole range in hole */
@@ -1078,7 +1088,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
vma = find_vma(mm, 0);
VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
- flags | MPOL_MF_DISCONTIG_OK, &pagelist);
+ flags | MPOL_MF_DISCONTIG_OK, &pagelist, false);
if (!list_empty(&pagelist)) {
err = migrate_pages(&pagelist, alloc_migration_target, NULL,
@@ -1321,12 +1331,8 @@ static long do_mbind(unsigned long start, unsigned long len,
* Lock the VMAs before scanning for pages to migrate, to ensure we don't
* miss a concurrently inserted page.
*/
- vma_iter_init(&vmi, mm, start);
- for_each_vma_range(vmi, vma, end)
- vma_start_write(vma);
-
ret = queue_pages_range(mm, start, end, nmask,
- flags | MPOL_MF_INVERT, &pagelist);
+ flags | MPOL_MF_INVERT, &pagelist, true);
if (ret < 0) {
err = ret;
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 8365158460ed..d5f492356e3e 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -279,6 +279,7 @@ next:
static const struct mm_walk_ops migrate_vma_walk_ops = {
.pmd_entry = migrate_vma_collect_pmd,
.pte_hole = migrate_vma_collect_hole,
+ .walk_lock = PGWALK_RDLOCK,
};
/*
diff --git a/mm/mincore.c b/mm/mincore.c
index b7f7a516b26c..dad3622cc963 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -176,6 +176,7 @@ static const struct mm_walk_ops mincore_walk_ops = {
.pmd_entry = mincore_pte_range,
.pte_hole = mincore_unmapped_range,
.hugetlb_entry = mincore_hugetlb,
+ .walk_lock = PGWALK_RDLOCK,
};
/*
diff --git a/mm/mlock.c b/mm/mlock.c
index 0a0c996c5c21..479e09d0994c 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -371,6 +371,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
{
static const struct mm_walk_ops mlock_walk_ops = {
.pmd_entry = mlock_pte_range,
+ .walk_lock = PGWALK_WRLOCK_VERIFY,
};
/*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 6f658d483704..3aef1340533a 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -568,6 +568,7 @@ static const struct mm_walk_ops prot_none_walk_ops = {
.pte_entry = prot_none_pte_entry,
.hugetlb_entry = prot_none_hugetlb_entry,
.test_walk = prot_none_test,
+ .walk_lock = PGWALK_WRLOCK,
};
int
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 2022333805d3..9b2d23fbf4d3 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -400,6 +400,33 @@ static int __walk_page_range(unsigned long start, unsigned long end,
return err;
}
+static inline void process_mm_walk_lock(struct mm_struct *mm,
+ enum page_walk_lock walk_lock)
+{
+ if (walk_lock == PGWALK_RDLOCK)
+ mmap_assert_locked(mm);
+ else
+ mmap_assert_write_locked(mm);
+}
+
+static inline void process_vma_walk_lock(struct vm_area_struct *vma,
+ enum page_walk_lock walk_lock)
+{
+#ifdef CONFIG_PER_VMA_LOCK
+ switch (walk_lock) {
+ case PGWALK_WRLOCK:
+ vma_start_write(vma);
+ break;
+ case PGWALK_WRLOCK_VERIFY:
+ vma_assert_write_locked(vma);
+ break;
+ case PGWALK_RDLOCK:
+ /* PGWALK_RDLOCK is handled by process_mm_walk_lock */
+ break;
+ }
+#endif
+}
+
/**
* walk_page_range - walk page table with caller specific callbacks
* @mm: mm_struct representing the target process of page table walk
@@ -459,7 +486,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
if (!walk.mm)
return -EINVAL;
- mmap_assert_locked(walk.mm);
+ process_mm_walk_lock(walk.mm, ops->walk_lock);
vma = find_vma(walk.mm, start);
do {
@@ -474,6 +501,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
if (ops->pte_hole)
err = ops->pte_hole(start, next, -1, &walk);
} else { /* inside vma */
+ process_vma_walk_lock(vma, ops->walk_lock);
walk.vma = vma;
next = min(end, vma->vm_end);
vma = find_vma(mm, vma->vm_end);
@@ -549,7 +577,8 @@ int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
if (start < vma->vm_start || end > vma->vm_end)
return -EINVAL;
- mmap_assert_locked(walk.mm);
+ process_mm_walk_lock(walk.mm, ops->walk_lock);
+ process_vma_walk_lock(vma, ops->walk_lock);
return __walk_page_range(start, end, &walk);
}
@@ -566,7 +595,8 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
if (!walk.mm)
return -EINVAL;
- mmap_assert_locked(walk.mm);
+ process_mm_walk_lock(walk.mm, ops->walk_lock);
+ process_vma_walk_lock(vma, ops->walk_lock);
return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
}
diff --git a/mm/shmem.c b/mm/shmem.c
index f5af4b943e42..d963c747dabc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -806,14 +806,16 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
XA_STATE(xas, &mapping->i_pages, start);
struct page *page;
unsigned long swapped = 0;
+ unsigned long max = end - 1;
rcu_read_lock();
- xas_for_each(&xas, page, end - 1) {
+ xas_for_each(&xas, page, max) {
if (xas_retry(&xas, page))
continue;
if (xa_is_value(page))
swapped++;
-
+ if (xas.xa_index == max)
+ break;
if (need_resched()) {
xas_pause(&xas);
cond_resched_rcu();
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 93cf99aba335..228a4a5312f2 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2979,6 +2979,10 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
free_vm_area(area);
return NULL;
}
+
+ flush_cache_vmap((unsigned long)area->addr,
+ (unsigned long)area->addr + count * PAGE_SIZE);
+
return area->addr;
}
EXPORT_SYMBOL_GPL(vmap_pfn);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1080209a568b..2fe4a11d63f4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4284,6 +4284,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
static const struct mm_walk_ops mm_walk_ops = {
.test_walk = should_skip_vma,
.p4d_entry = walk_pud_range,
+ .walk_lock = PGWALK_RDLOCK,
};
int err;
@@ -4853,16 +4854,17 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg)
spin_lock_irq(&pgdat->memcg_lru.lock);
- VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
+ if (hlist_nulls_unhashed(&lruvec->lrugen.list))
+ goto unlock;
gen = lruvec->lrugen.gen;
- hlist_nulls_del_rcu(&lruvec->lrugen.list);
+ hlist_nulls_del_init_rcu(&lruvec->lrugen.list);
pgdat->memcg_lru.nr_memcgs[gen]--;
if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
-
+unlock:
spin_unlock_irq(&pgdat->memcg_lru.lock);
}
}
@@ -5434,8 +5436,10 @@ restart:
rcu_read_lock();
hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {
- if (op)
+ if (op) {
lru_gen_rotate_memcg(lruvec, op);
+ op = 0;
+ }
mem_cgroup_put(memcg);
@@ -5443,7 +5447,7 @@ restart:
memcg = lruvec_memcg(lruvec);
if (!mem_cgroup_tryget(memcg)) {
- op = 0;
+ lru_gen_release_memcg(memcg);
memcg = NULL;
continue;
}