diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 69 |
1 files changed, 38 insertions, 31 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f42bb51e023a..7bf2299cb24b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -70,9 +70,8 @@ static atomic_t huge_zero_refcount; struct page *huge_zero_page __read_mostly; unsigned long huge_zero_pfn __read_mostly = ~0UL; -bool hugepage_vma_check(struct vm_area_struct *vma, - unsigned long vm_flags, - bool smaps, bool in_pf) +bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, + bool smaps, bool in_pf, bool enforce_sysfs) { if (!vma->vm_mm) /* vdso */ return false; @@ -121,11 +120,10 @@ bool hugepage_vma_check(struct vm_area_struct *vma, if (!in_pf && shmem_file(vma->vm_file)) return shmem_huge_enabled(vma); - if (!hugepage_flags_enabled()) - return false; - - /* THP settings require madvise. */ - if (!(vm_flags & VM_HUGEPAGE) && !hugepage_flags_always()) + /* Enforce sysfs THP requirements as necessary */ + if (enforce_sysfs && + (!hugepage_flags_enabled() || (!(vm_flags & VM_HUGEPAGE) && + !hugepage_flags_always()))) return false; /* Only regular file is valid */ @@ -772,8 +770,7 @@ static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, return; entry = mk_pmd(zero_page, vma->vm_page_prot); entry = pmd_mkhuge(entry); - if (pgtable) - pgtable_trans_huge_deposit(mm, pmd, pgtable); + pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); mm_inc_nr_ptes(mm); } @@ -1479,7 +1476,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) struct page *page; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; int page_nid = NUMA_NO_NODE; - int target_nid, last_cpupid = -1; + int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK); bool migrated = false; bool was_writable = pmd_savedwrite(oldpmd); int flags = 0; @@ -1500,7 +1497,12 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) flags |= TNF_NO_GROUP; page_nid = page_to_nid(page); - last_cpupid = page_cpupid_last(page); + /* + * For memory tiering mode, cpupid of slow memory page is used + * to record page access time. So use default value. + */ + if (node_is_toptier(page_nid)) + last_cpupid = page_cpupid_last(page); target_nid = numa_migrate_prep(page, vma, haddr, page_nid, &flags); @@ -1824,6 +1826,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (prot_numa) { struct page *page; + bool toptier; /* * Avoid trapping faults against the zero page. The read-only * data is likely to be read-cached on the local CPU and @@ -1836,13 +1839,18 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, goto unlock; page = pmd_page(*pmd); + toptier = node_is_toptier(page_to_nid(page)); /* * Skip scanning top tier node if normal numa * balancing is disabled */ if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && - node_is_toptier(page_to_nid(page))) + toptier) goto unlock; + + if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING && + !toptier) + xchg_page_access_time(page, jiffies_to_msecs(jiffies)); } /* * In case prot_numa, we are under mmap_read_lock(mm). It's critical @@ -2140,6 +2148,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, * * In case we cannot clear PageAnonExclusive(), split the PMD * only and let try_to_migrate_one() fail later. + * + * See page_try_share_anon_rmap(): invalidate PMD first. */ anon_exclusive = PageAnon(page) && PageAnonExclusive(page); if (freeze && anon_exclusive && page_try_share_anon_rmap(page)) @@ -2288,25 +2298,11 @@ out: void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct folio *folio) { - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - pgd = pgd_offset(vma->vm_mm, address); - if (!pgd_present(*pgd)) - return; + pmd_t *pmd = mm_find_pmd(vma->vm_mm, address); - p4d = p4d_offset(pgd, address); - if (!p4d_present(*p4d)) + if (!pmd) return; - pud = pud_offset(p4d, address); - if (!pud_present(*pud)) - return; - - pmd = pmd_offset(pud, address); - __split_huge_pmd(vma, pmd, address, freeze, folio); } @@ -2649,6 +2645,8 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) mapping = NULL; anon_vma_lock_write(anon_vma); } else { + gfp_t gfp; + mapping = head->mapping; /* Truncated ? */ @@ -2657,8 +2655,16 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) goto out; } - xas_split_alloc(&xas, head, compound_order(head), - mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK); + gfp = current_gfp_context(mapping_gfp_mask(mapping) & + GFP_RECLAIM_MASK); + + if (folio_test_private(folio) && + !filemap_release_folio(folio, gfp)) { + ret = -EBUSY; + goto out; + } + + xas_split_alloc(&xas, head, compound_order(head), gfp); if (xas_error(&xas)) { ret = xas_error(&xas); goto out; @@ -3175,6 +3181,7 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); pmdval = pmdp_invalidate(vma, address, pvmw->pmd); + /* See page_try_share_anon_rmap(): invalidate PMD first. */ anon_exclusive = PageAnon(page) && PageAnonExclusive(page); if (anon_exclusive && page_try_share_anon_rmap(page)) { set_pmd_at(mm, address, pvmw->pmd, pmdval); |