diff options
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r-- | mm/khugepaged.c | 45 |
1 files changed, 31 insertions, 14 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index bad1e130eda8..5f0be134141e 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -948,17 +948,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, return SCAN_SUCCEED; } -static int find_pmd_or_thp_or_none(struct mm_struct *mm, - unsigned long address, - pmd_t **pmd) +static inline int check_pmd_state(pmd_t *pmd) { - pmd_t pmde; + pmd_t pmde = pmdp_get_lockless(pmd); - *pmd = mm_find_pmd(mm, address); - if (!*pmd) - return SCAN_PMD_NULL; - - pmde = pmdp_get_lockless(*pmd); if (pmd_none(pmde)) return SCAN_PMD_NONE; if (!pmd_present(pmde)) @@ -972,6 +965,17 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm, return SCAN_SUCCEED; } +static int find_pmd_or_thp_or_none(struct mm_struct *mm, + unsigned long address, + pmd_t **pmd) +{ + *pmd = mm_find_pmd(mm, address); + if (!*pmd) + return SCAN_PMD_NULL; + + return check_pmd_state(*pmd); +} + static int check_pmd_still_valid(struct mm_struct *mm, unsigned long address, pmd_t *pmd) @@ -1721,7 +1725,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) pmd_t *pmd, pgt_pmd; spinlock_t *pml; spinlock_t *ptl; - bool skipped_uffd = false; + bool success = false; /* * Check vma->anon_vma to exclude MAP_PRIVATE mappings that @@ -1758,6 +1762,19 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) mmu_notifier_invalidate_range_start(&range); pml = pmd_lock(mm, pmd); + /* + * The lock of new_folio is still held, we will be blocked in + * the page fault path, which prevents the pte entries from + * being set again. So even though the old empty PTE page may be + * concurrently freed and a new PTE page is filled into the pmd + * entry, it is still empty and can be removed. + * + * So here we only need to recheck if the state of pmd entry + * still meets our requirements, rather than checking pmd_same() + * like elsewhere. + */ + if (check_pmd_state(pmd) != SCAN_SUCCEED) + goto drop_pml; ptl = pte_lockptr(mm, pmd); if (ptl != pml) spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); @@ -1771,20 +1788,20 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * repeating the anon_vma check protects from one category, * and repeating the userfaultfd_wp() check from another. */ - if (unlikely(vma->anon_vma || userfaultfd_wp(vma))) { - skipped_uffd = true; - } else { + if (likely(!vma->anon_vma && !userfaultfd_wp(vma))) { pgt_pmd = pmdp_collapse_flush(vma, addr, pmd); pmdp_get_lockless_sync(); + success = true; } if (ptl != pml) spin_unlock(ptl); +drop_pml: spin_unlock(pml); mmu_notifier_invalidate_range_end(&range); - if (!skipped_uffd) { + if (success) { mm_dec_nr_ptes(mm); page_table_check_pte_clear_range(mm, addr, pgt_pmd); pte_free_defer(mm, pmd_pgtable(pgt_pmd)); |