diff options
Diffstat (limited to 'arch/arm64/include/asm/pgtable.h')
| -rw-r--r-- | arch/arm64/include/asm/pgtable.h | 488 |
1 files changed, 306 insertions, 182 deletions
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0b2a2ad1b9e8..4dfa42b7d053 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -30,7 +30,7 @@ #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/cmpxchg.h> #include <asm/fixmap.h> @@ -40,38 +40,76 @@ #include <linux/sched.h> #include <linux/page_table_check.h> +static inline void emit_pte_barriers(void) +{ + /* + * These barriers are emitted under certain conditions after a pte entry + * was modified (see e.g. __set_pte_complete()). The dsb makes the store + * visible to the table walker. The isb ensures that any previous + * speculative "invalid translation" marker that is in the CPU's + * pipeline gets cleared, so that any access to that address after + * setting the pte to valid won't cause a spurious fault. If the thread + * gets preempted after storing to the pgtable but before emitting these + * barriers, __switch_to() emits a dsb which ensure the walker gets to + * see the store. There is no guarantee of an isb being issued though. + * This is safe because it will still get issued (albeit on a + * potentially different CPU) when the thread starts running again, + * before any access to the address. + */ + dsb(ishst); + isb(); +} + +static inline void queue_pte_barriers(void) +{ + if (is_lazy_mmu_mode_active()) { + /* Avoid the atomic op if already set. */ + if (!test_thread_flag(TIF_LAZY_MMU_PENDING)) + set_thread_flag(TIF_LAZY_MMU_PENDING); + } else { + emit_pte_barriers(); + } +} + +static inline void arch_enter_lazy_mmu_mode(void) {} + +static inline void arch_flush_lazy_mmu_mode(void) +{ + if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING)) + emit_pte_barriers(); +} + +static inline void arch_leave_lazy_mmu_mode(void) +{ + arch_flush_lazy_mmu_mode(); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE /* Set stride and tlb_level in flush_*_tlb_range */ #define flush_pmd_tlb_range(vma, addr, end) \ - __flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2) + __flush_tlb_range(vma, addr, end, PMD_SIZE, 2, TLBF_NONE) #define flush_pud_tlb_range(vma, addr, end) \ - __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) + __flush_tlb_range(vma, addr, end, PUD_SIZE, 1, TLBF_NONE) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * Outside of a few very special situations (e.g. hibernation), we always - * use broadcast TLB invalidation instructions, therefore a spurious page - * fault on one CPU which has been handled concurrently by another CPU - * does not need to perform additional invalidation. + * We use local TLB invalidation instruction when reusing page in + * write protection fault handler to avoid TLBI broadcast in the hot + * path. This will cause spurious page faults if stale read-only TLB + * entries exist. */ -#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) +#define flush_tlb_fix_spurious_fault(vma, address, ptep) \ + __flush_tlb_page(vma, address, TLBF_NOBROADCAST | TLBF_NONOTIFY) -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page)) +#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \ + __flush_tlb_range(vma, address, address + PMD_SIZE, PMD_SIZE, 2, \ + TLBF_NOBROADCAST | TLBF_NONOTIFY | TLBF_NOWALKCACHE) #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) -/* - * Macros to convert between a physical address and its placement in a - * page table entry, taking care of 52-bit addresses. - */ #ifdef CONFIG_ARM64_PA_BITS_52 static inline phys_addr_t __pte_to_phys(pte_t pte) { @@ -84,8 +122,15 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK; } #else -#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_LOW) -#define __phys_to_pte_val(phys) (phys) +static inline phys_addr_t __pte_to_phys(pte_t pte) +{ + return pte_val(pte) & PTE_ADDR_LOW; +} + +static inline pteval_t __phys_to_pte_val(phys_addr_t phys) +{ + return phys; +} #endif #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT) @@ -93,8 +138,6 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pte_none(pte) (!pte_val(pte)) -#define __pte_clear(mm, addr, ptep) \ - __set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) /* @@ -108,7 +151,6 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) -#define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP)) #define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \ PTE_ATTRINDX(MT_NORMAL_TAGGED)) @@ -212,7 +254,8 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) static inline pte_t pte_mkwrite_novma(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + if (pte_sw_dirty(pte)) + pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); return pte; } @@ -273,9 +316,11 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } -static inline pte_t pte_mkvalid(pte_t pte) +static inline pte_t pte_mkvalid_k(pte_t pte) { - return set_pte_bit(pte, __pgprot(PTE_VALID)); + pte = clear_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID)); + pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_VALID_KERNEL)); + return pte; } static inline pte_t pte_mkinvalid(pte_t pte) @@ -290,9 +335,9 @@ static inline pmd_t pmd_mkcont(pmd_t pmd) return __pmd(pmd_val(pmd) | PMD_SECT_CONT); } -static inline pte_t pte_mkdevmap(pte_t pte) +static inline pmd_t pmd_mknoncont(pmd_t pmd) { - return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); + return __pmd(pmd_val(pmd) & ~PMD_SECT_CONT); } #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP @@ -317,18 +362,20 @@ static inline void __set_pte_nosync(pte_t *ptep, pte_t pte) WRITE_ONCE(*ptep, pte); } -static inline void __set_pte(pte_t *ptep, pte_t pte) +static inline void __set_pte_complete(pte_t pte) { - __set_pte_nosync(ptep, pte); - /* * Only if the new pte is valid and kernel, otherwise TLB maintenance - * or update_mmu_cache() have the necessary barriers. + * has the necessary barriers. */ - if (pte_valid_not_user(pte)) { - dsb(ishst); - isb(); - } + if (pte_valid_not_user(pte)) + queue_pte_barriers(); +} + +static inline void __set_pte(pte_t *ptep, pte_t pte) +{ + __set_pte_nosync(ptep, pte); + __set_pte_complete(pte); } static inline pte_t __ptep_get(pte_t *ptep) @@ -349,7 +396,7 @@ bool pgattr_change_is_safe(pteval_t old, pteval_t new); * 1 0 | 1 0 1 * 1 1 | 0 1 x * - * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via + * When hardware DBM is not present, the software PTE_DIRTY bit is updated via * the page fault mechanism. Checking the dirty status of a pte becomes: * * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) @@ -420,23 +467,6 @@ static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte)); } -static inline void __set_ptes(struct mm_struct *mm, - unsigned long __always_unused addr, - pte_t *ptep, pte_t pte, unsigned int nr) -{ - page_table_check_ptes_set(mm, ptep, pte, nr); - __sync_cache_and_tags(pte, nr); - - for (;;) { - __check_safe_pte_update(mm, ptep, pte); - __set_pte(ptep, pte); - if (--nr == 0) - break; - ptep++; - pte = pte_advance_pfn(pte, 1); - } -} - /* * Hugetlb definitions. */ @@ -483,12 +513,12 @@ static inline pmd_t pte_pmd(pte_t pte) static inline pgprot_t mk_pud_sect_prot(pgprot_t prot) { - return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT); + return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT); } static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) { - return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT); + return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT); } static inline pte_t pte_swp_mkexclusive(pte_t pte) @@ -496,7 +526,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & PTE_SWP_EXCLUSIVE; } @@ -532,7 +562,7 @@ static inline int pte_protnone(pte_t pte) /* * pte_present_invalid() tells us that the pte is invalid from HW * perspective but present from SW perspective, so the fields are to be - * interpretted as per the HW layout. The second 2 checks are the unique + * interpreted as per the HW layout. The second 2 checks are the unique * encoding that we use for PROT_NONE. It is insufficient to only use * the first check because we share the same encoding scheme with pmds * which support pmd_mkinvalid(), so can be present-invalid without @@ -548,18 +578,6 @@ static inline int pmd_protnone(pmd_t pmd) #endif #define pmd_present(pmd) pte_present(pmd_pte(pmd)) - -/* - * THP definitions. - */ - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int pmd_trans_huge(pmd_t pmd) -{ - return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) @@ -572,6 +590,7 @@ static inline int pmd_trans_huge(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mkvalid_k(pmd) pte_pmd(pte_mkvalid_k(pmd_pte(pmd))) #define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd))) #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP #define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd)) @@ -585,14 +604,17 @@ static inline int pmd_trans_huge(pmd_t pmd) #define pmd_write(pmd) pte_write(pmd_pte(pmd)) -#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd)) -#endif -static inline pmd_t pmd_mkdevmap(pmd_t pmd) +static inline pmd_t pmd_mkhuge(pmd_t pmd) { - return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP))); + /* + * It's possible that the pmd is present-invalid on entry + * and in that case it needs to remain present-invalid on + * exit. So ensure the VALID bit does not get modified. + */ + pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID; + pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID; + + return __pmd((pmd_val(pmd) & ~mask) | val); } #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP @@ -607,24 +629,31 @@ static inline pmd_t pmd_mkspecial(pmd_t pmd) #define __phys_to_pmd_val(phys) __phys_to_pte_val(phys) #define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT) #define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) #define pud_young(pud) pte_young(pud_pte(pud)) #define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud))) +#define pud_mkwrite_novma(pud) pte_pud(pte_mkwrite_novma(pud_pte(pud))) +#define pud_mkvalid_k(pud) pte_pud(pte_mkvalid_k(pud_pte(pud))) #define pud_write(pud) pte_write(pud_pte(pud)) -#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT)) +static inline pud_t pud_mkhuge(pud_t pud) +{ + /* + * It's possible that the pud is present-invalid on entry + * and in that case it needs to remain present-invalid on + * exit. So ensure the VALID bit does not get modified. + */ + pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID; + pudval_t val = PUD_TYPE_SECT & ~PTE_VALID; + + return __pud((pud_val(pud) & ~mask) | val); +} #define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud)) #define __phys_to_pud_val(phys) __phys_to_pte_val(phys) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP -#define pud_special(pte) pte_special(pud_pte(pud)) -#define pud_mkspecial(pte) pte_pud(pte_mkspecial(pud_pte(pud))) -#endif - #define pmd_pgprot pmd_pgprot static inline pgprot_t pmd_pgprot(pmd_t pmd) { @@ -641,30 +670,63 @@ static inline pgprot_t pud_pgprot(pud_t pud) return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud)); } -static inline void __set_pte_at(struct mm_struct *mm, - unsigned long __always_unused addr, - pte_t *ptep, pte_t pte, unsigned int nr) +static inline void __set_ptes_anysz(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr, + unsigned long pgsize) { - __sync_cache_and_tags(pte, nr); - __check_safe_pte_update(mm, ptep, pte); - __set_pte(ptep, pte); + unsigned long stride = pgsize >> PAGE_SHIFT; + + switch (pgsize) { + case PAGE_SIZE: + page_table_check_ptes_set(mm, addr, ptep, pte, nr); + break; + case PMD_SIZE: + page_table_check_pmds_set(mm, addr, (pmd_t *)ptep, + pte_pmd(pte), nr); + break; +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + page_table_check_puds_set(mm, addr, (pud_t *)ptep, + pte_pud(pte), nr); + break; +#endif + default: + VM_WARN_ON(1); + } + + __sync_cache_and_tags(pte, nr * stride); + + for (;;) { + __check_safe_pte_update(mm, ptep, pte); + __set_pte_nosync(ptep, pte); + if (--nr == 0) + break; + ptep++; + pte = pte_advance_pfn(pte, stride); + } + + __set_pte_complete(pte); } -static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd) +static inline void __set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr) { - page_table_check_pmd_set(mm, pmdp, pmd); - return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd), - PMD_SIZE >> PAGE_SHIFT); + __set_ptes_anysz(mm, addr, ptep, pte, nr, PAGE_SIZE); } -static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pud) +static inline void __set_pmds(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd, unsigned int nr) { - page_table_check_pud_set(mm, pudp, pud); - return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud), - PUD_SIZE >> PAGE_SHIFT); + __set_ptes_anysz(mm, addr, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE); } +#define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1) + +static inline void __set_puds(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud, unsigned int nr) +{ + __set_ptes_anysz(mm, addr, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE); +} +#define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1) #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) @@ -716,26 +778,36 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_TABLE) -#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ - PMD_TYPE_SECT) -#define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd)) + +#define pmd_leaf pmd_leaf +static inline bool pmd_leaf(pmd_t pmd) +{ + return pmd_present(pmd) && !pmd_table(pmd); +} + #define pmd_bad(pmd) (!pmd_table(pmd)) #define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE) #define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline int pmd_trans_huge(pmd_t pmd) +{ + /* + * If pmd is present-invalid, pmd_table() won't detect it + * as a table, so force the valid bit for the comparison. + */ + return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID)); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 -static inline bool pud_sect(pud_t pud) { return false; } static inline bool pud_table(pud_t pud) { return true; } #else -#define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ - PUD_TYPE_SECT) #define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_TABLE) #endif -extern pgd_t init_pg_dir[]; -extern pgd_t init_pg_end[]; extern pgd_t swapper_pg_dir[]; extern pgd_t idmap_pg_dir[]; extern pgd_t tramp_pg_dir[]; @@ -760,10 +832,8 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) WRITE_ONCE(*pmdp, pmd); - if (pmd_valid(pmd)) { - dsb(ishst); - isb(); - } + if (pmd_valid(pmd)) + queue_pte_barriers(); } static inline void pmd_clear(pmd_t *pmdp) @@ -793,22 +863,21 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) /* use ONLY for statically allocated translation tables */ #define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ -#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) - #if CONFIG_PGTABLE_LEVELS > 2 #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) (!pud_table(pud)) +#define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \ + PUD_TYPE_TABLE) #define pud_present(pud) pte_present(pud_pte(pud)) #ifndef __PAGETABLE_PMD_FOLDED -#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) +#define pud_leaf pud_leaf +static inline bool pud_leaf(pud_t pud) +{ + return pud_present(pud) && !pud_table(pud); +} #else #define pud_leaf(pud) false #endif @@ -827,10 +896,8 @@ static inline void set_pud(pud_t *pudp, pud_t pud) WRITE_ONCE(*pudp, pud); - if (pud_valid(pud)) { - dsb(ishst); - isb(); - } + if (pud_valid(pud)) + queue_pte_barriers(); } static inline void pud_clear(pud_t *pudp) @@ -896,7 +963,9 @@ static inline bool mm_pud_folded(const struct mm_struct *mm) pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) #define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d)) -#define p4d_bad(p4d) (pgtable_l4_enabled() && !(p4d_val(p4d) & P4D_TABLE_BIT)) +#define p4d_bad(p4d) (pgtable_l4_enabled() && \ + ((p4d_val(p4d) & P4D_TYPE_MASK) != \ + P4D_TYPE_TABLE)) #define p4d_present(p4d) (!p4d_none(p4d)) static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) @@ -907,8 +976,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) } WRITE_ONCE(*p4dp, p4d); - dsb(ishst); - isb(); + queue_pte_barriers(); } static inline void p4d_clear(p4d_t *p4dp) @@ -1023,7 +1091,9 @@ static inline bool mm_p4d_folded(const struct mm_struct *mm) pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e)) #define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd)) -#define pgd_bad(pgd) (pgtable_l5_enabled() && !(pgd_val(pgd) & PGD_TABLE_BIT)) +#define pgd_bad(pgd) (pgtable_l5_enabled() && \ + ((pgd_val(pgd) & PGD_TYPE_MASK) != \ + PGD_TYPE_TABLE)) #define pgd_present(pgd) (!pgd_none(pgd)) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) @@ -1034,8 +1104,7 @@ static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) } WRITE_ONCE(*pgdp, pgd); - dsb(ishst); - isb(); + queue_pte_barriers(); } static inline void pgd_clear(pgd_t *pgdp) @@ -1182,9 +1251,18 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); } -extern int __ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, - pte_t entry, int dirty); +extern int __ptep_set_access_flags_anysz(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty, + unsigned long pgsize); + +static inline int __ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + return __ptep_set_access_flags_anysz(vma, address, ptep, entry, dirty, + PAGE_SIZE); +} #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS @@ -1192,33 +1270,23 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty) { - return __ptep_set_access_flags(vma, address, (pte_t *)pmdp, - pmd_pte(entry), dirty); -} - -static inline int pud_devmap(pud_t pud) -{ - return 0; -} - -static inline int pgd_devmap(pgd_t pgd) -{ - return 0; + return __ptep_set_access_flags_anysz(vma, address, (pte_t *)pmdp, + pmd_pte(entry), dirty, PMD_SIZE); } #endif #ifdef CONFIG_PAGE_TABLE_CHECK -static inline bool pte_user_accessible_page(pte_t pte) +static inline bool pte_user_accessible_page(struct mm_struct *mm, unsigned long addr, pte_t pte) { return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte)); } -static inline bool pmd_user_accessible_page(pmd_t pmd) +static inline bool pmd_user_accessible_page(struct mm_struct *mm, unsigned long addr, pmd_t pmd) { return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } -static inline bool pud_user_accessible_page(pud_t pud) +static inline bool pud_user_accessible_page(struct mm_struct *mm, unsigned long addr, pud_t pud) { return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud)); } @@ -1227,9 +1295,15 @@ static inline bool pud_user_accessible_page(pud_t pud) /* * Atomic pte/pmd modifications. */ -static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, - pte_t *ptep) + +static inline void __pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + __set_pte(ptep, __pte(0)); +} + +static inline bool __ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) { pte_t old_pte, pte; @@ -1244,10 +1318,10 @@ static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma, return pte_young(pte); } -static inline int __ptep_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) +static inline bool __ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) { - int young = __ptep_test_and_clear_young(vma, address, ptep); + bool young = __ptep_test_and_clear_young(vma, address, ptep); if (young) { /* @@ -1258,7 +1332,7 @@ static inline int __ptep_clear_flush_young(struct vm_area_struct *vma, * context-switch, which provides a DSB to complete the TLB * invalidation. */ - flush_tlb_page_nosync(vma, address); + __flush_tlb_page(vma, address, TLBF_NOSYNC); } return young; @@ -1266,9 +1340,8 @@ static inline int __ptep_clear_flush_young(struct vm_area_struct *vma, #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, - pmd_t *pmdp) +static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) { /* Operation applies to PMD table entry only if FEAT_HAFT is enabled */ VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft()); @@ -1276,16 +1349,38 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ -static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, - unsigned long address, pte_t *ptep) +static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm, + unsigned long address, + pte_t *ptep, + unsigned long pgsize) { pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); - page_table_check_pte_clear(mm, pte); + switch (pgsize) { + case PAGE_SIZE: + page_table_check_pte_clear(mm, address, pte); + break; + case PMD_SIZE: + page_table_check_pmd_clear(mm, address, pte_pmd(pte)); + break; +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + page_table_check_pud_clear(mm, address, pte_pud(pte)); + break; +#endif + default: + VM_WARN_ON(1); + } return pte; } +static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + return __ptep_get_and_clear_anysz(mm, address, ptep, PAGE_SIZE); +} + static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned int nr, int full) { @@ -1322,11 +1417,7 @@ static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0)); - - page_table_check_pmd_clear(mm, pmd); - - return pmd; + return pte_pmd(__ptep_get_and_clear_anysz(mm, address, (pte_t *)pmdp, PMD_SIZE)); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -1415,7 +1506,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { - page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); + page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd))); } #endif @@ -1519,6 +1610,14 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, */ #define arch_wants_old_prefaulted_pte cpu_has_hw_af +/* + * Request exec memory is read into pagecache in at least 64K folios. This size + * can be contpte-mapped when 4K base pages are in use (16 pages into 1 iTLB + * entry), and HPA can coalesce it (4 pages into 1 TLB entry) when 16K base + * pages are in use. + */ +#define exec_folio_order() ilog2(SZ_64K >> PAGE_SHIFT) + static inline bool pud_sect_supported(void) { return PAGE_SIZE == SZ_4K; @@ -1535,6 +1634,16 @@ extern void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t new_pte); +#define modify_prot_start_ptes modify_prot_start_ptes +extern pte_t modify_prot_start_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr); + +#define modify_prot_commit_ptes modify_prot_commit_ptes +extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte, + unsigned int nr); + #ifdef CONFIG_ARM64_CONTPTE /* @@ -1555,10 +1664,10 @@ extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr, extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned int nr, int full); -extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep); -extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep); +bool contpte_test_and_clear_young_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr); +bool contpte_clear_flush_young_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr); extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned int nr); extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma, @@ -1721,28 +1830,43 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, return __ptep_get_and_clear(mm, addr, ptep); } -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +#define test_and_clear_young_ptes test_and_clear_young_ptes +static inline bool test_and_clear_young_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr) { - pte_t orig_pte = __ptep_get(ptep); - - if (likely(!pte_valid_cont(orig_pte))) + if (likely(nr == 1 && !pte_cont(__ptep_get(ptep)))) return __ptep_test_and_clear_young(vma, addr, ptep); - return contpte_ptep_test_and_clear_young(vma, addr, ptep); + return contpte_test_and_clear_young_ptes(vma, addr, ptep, nr); +} + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + return test_and_clear_young_ptes(vma, addr, ptep, 1); } #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -static inline int ptep_clear_flush_young(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +static inline bool ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) { pte_t orig_pte = __ptep_get(ptep); if (likely(!pte_valid_cont(orig_pte))) return __ptep_clear_flush_young(vma, addr, ptep); - return contpte_ptep_clear_flush_young(vma, addr, ptep); + return contpte_clear_flush_young_ptes(vma, addr, ptep, 1); +} + +#define clear_flush_young_ptes clear_flush_young_ptes +static inline bool clear_flush_young_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr) +{ + if (likely(nr == 1 && !pte_cont(__ptep_get(ptep)))) + return __ptep_clear_flush_young(vma, addr, ptep); + + return contpte_clear_flush_young_ptes(vma, addr, ptep, nr); } #define wrprotect_ptes wrprotect_ptes @@ -1825,6 +1949,6 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, #endif /* CONFIG_ARM64_CONTPTE */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_PGTABLE_H */ |
