diff options
-rw-r--r-- | mm/khugepaged.c | 46 |
1 files changed, 37 insertions, 9 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 37d6c4be632f..71ee91db1603 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -526,6 +526,17 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte) } } +static bool is_refcount_suitable(struct page *page) +{ + int expected_refcount; + + expected_refcount = total_mapcount(page); + if (PageSwapCache(page)) + expected_refcount += compound_nr(page); + + return page_count(page) == expected_refcount; +} + static int __collapse_huge_page_isolate(struct vm_area_struct *vma, unsigned long address, pte_t *pte) @@ -578,11 +589,17 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, } /* - * cannot use mapcount: can't collapse if there's a gup pin. - * The page must only be referenced by the scanned process - * and page swap cache. + * Check if the page has any GUP (or other external) pins. + * + * The page table that maps the page has been already unlinked + * from the page table tree and this process cannot get + * an additinal pin on the page. + * + * New pins can come later if the page is shared across fork, + * but not from this process. The other process cannot write to + * the page, only trigger CoW. */ - if (page_count(page) != 1 + PageSwapCache(page)) { + if (!is_refcount_suitable(page)) { unlock_page(page); result = SCAN_PAGE_COUNT; goto out; @@ -669,7 +686,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, } else { src_page = pte_page(pteval); copy_user_highpage(page, src_page, address, vma); - VM_BUG_ON_PAGE(page_mapcount(src_page) != 1, src_page); release_pte_page(src_page); /* * ptl mostly unnecessary, but preempt has to @@ -1221,11 +1237,23 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, } /* - * cannot use mapcount: can't collapse if there's a gup pin. - * The page must only be referenced by the scanned process - * and page swap cache. + * Check if the page has any GUP (or other external) pins. + * + * Here the check is racy it may see totmal_mapcount > refcount + * in some cases. + * For example, one process with one forked child process. + * The parent has the PMD split due to MADV_DONTNEED, then + * the child is trying unmap the whole PMD, but khugepaged + * may be scanning the parent between the child has + * PageDoubleMap flag cleared and dec the mapcount. So + * khugepaged may see total_mapcount > refcount. + * + * But such case is ephemeral we could always retry collapse + * later. However it may report false positive if the page + * has excessive GUP pins (i.e. 512). Anyway the same check + * will be done again later the risk seems low. */ - if (page_count(page) != 1 + PageSwapCache(page)) { + if (!is_refcount_suitable(page)) { result = SCAN_PAGE_COUNT; goto out_unmap; } |