summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2018-06-07 17:06:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 17:34:35 -0700
commit688272809fcce5b17fcefd5892b59f3788efb144 (patch)
tree4f77743f058101bbde54ba77261984379c8ba822
parent7854207fe9545181b048df4e684def36306a86ec (diff)
downloadlwn-688272809fcce5b17fcefd5892b59f3788efb144.tar.gz
lwn-688272809fcce5b17fcefd5892b59f3788efb144.zip
mm, gup: prevent pmd checking race in follow_pmd_mask()
mmap_sem will be read locked when calling follow_pmd_mask(). But this cannot prevent PMD from being changed for all cases when PTL is unlocked, for example, from pmd_trans_huge() to pmd_none() via MADV_DONTNEED. So it is possible for the pmd_present() check in follow_pmd_mask() to encounter an invalid PMD. This may cause an incorrect VM_BUG_ON() or an infinite loop. Fix this by reading the PMD entry into a local variable with READ_ONCE() and checking the local variable and pmd_none() in the retry loop. As Kirill pointed out, with PTL unlocked, the *pmd may be changed under us, so reading it directly again and again may incur weird bugs. So although using *pmd directly other than for pmd_present() checking may be safe, it is still better to replace them to read *pmd once and check the local variable multiple times. When PTL unlocked, replace all *pmd with local variable was suggested by Kirill. Link: http://lkml.kernel.org/r/20180419083514.1365-1-ying.huang@intel.com Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Reviewed-by: Zi Yan <zi.yan@cs.rutgers.edu> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Cc: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/gup.c38
1 files changed, 27 insertions, 11 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 010153989b9b..1020c7f8f5ee 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -212,53 +212,69 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
unsigned long address, pud_t *pudp,
unsigned int flags, unsigned int *page_mask)
{
- pmd_t *pmd;
+ pmd_t *pmd, pmdval;
spinlock_t *ptl;
struct page *page;
struct mm_struct *mm = vma->vm_mm;
pmd = pmd_offset(pudp, address);
- if (pmd_none(*pmd))
+ /*
+ * The READ_ONCE() will stabilize the pmdval in a register or
+ * on the stack so that it will stop changing under the code.
+ */
+ pmdval = READ_ONCE(*pmd);
+ if (pmd_none(pmdval))
return no_page_table(vma, flags);
- if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
+ if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) {
page = follow_huge_pmd(mm, address, pmd, flags);
if (page)
return page;
return no_page_table(vma, flags);
}
- if (is_hugepd(__hugepd(pmd_val(*pmd)))) {
+ if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
page = follow_huge_pd(vma, address,
- __hugepd(pmd_val(*pmd)), flags,
+ __hugepd(pmd_val(pmdval)), flags,
PMD_SHIFT);
if (page)
return page;
return no_page_table(vma, flags);
}
retry:
- if (!pmd_present(*pmd)) {
+ if (!pmd_present(pmdval)) {
if (likely(!(flags & FOLL_MIGRATION)))
return no_page_table(vma, flags);
VM_BUG_ON(thp_migration_supported() &&
- !is_pmd_migration_entry(*pmd));
- if (is_pmd_migration_entry(*pmd))
+ !is_pmd_migration_entry(pmdval));
+ if (is_pmd_migration_entry(pmdval))
pmd_migration_entry_wait(mm, pmd);
+ pmdval = READ_ONCE(*pmd);
+ /*
+ * MADV_DONTNEED may convert the pmd to null because
+ * mmap_sem is held in read mode
+ */
+ if (pmd_none(pmdval))
+ return no_page_table(vma, flags);
goto retry;
}
- if (pmd_devmap(*pmd)) {
+ if (pmd_devmap(pmdval)) {
ptl = pmd_lock(mm, pmd);
page = follow_devmap_pmd(vma, address, pmd, flags);
spin_unlock(ptl);
if (page)
return page;
}
- if (likely(!pmd_trans_huge(*pmd)))
+ if (likely(!pmd_trans_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags);
- if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
+ if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
return no_page_table(vma, flags);
retry_locked:
ptl = pmd_lock(mm, pmd);
+ if (unlikely(pmd_none(*pmd))) {
+ spin_unlock(ptl);
+ return no_page_table(vma, flags);
+ }
if (unlikely(!pmd_present(*pmd))) {
spin_unlock(ptl);
if (likely(!(flags & FOLL_MIGRATION)))