summaryrefslogtreecommitdiff
path: root/arch/arm64/include/asm/pgtable.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include/asm/pgtable.h')
-rw-r--r--arch/arm64/include/asm/pgtable.h488
1 files changed, 306 insertions, 182 deletions
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0b2a2ad1b9e8..4dfa42b7d053 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -30,7 +30,7 @@
#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/cmpxchg.h>
#include <asm/fixmap.h>
@@ -40,38 +40,76 @@
#include <linux/sched.h>
#include <linux/page_table_check.h>
+static inline void emit_pte_barriers(void)
+{
+ /*
+ * These barriers are emitted under certain conditions after a pte entry
+ * was modified (see e.g. __set_pte_complete()). The dsb makes the store
+ * visible to the table walker. The isb ensures that any previous
+ * speculative "invalid translation" marker that is in the CPU's
+ * pipeline gets cleared, so that any access to that address after
+ * setting the pte to valid won't cause a spurious fault. If the thread
+ * gets preempted after storing to the pgtable but before emitting these
+ * barriers, __switch_to() emits a dsb which ensure the walker gets to
+ * see the store. There is no guarantee of an isb being issued though.
+ * This is safe because it will still get issued (albeit on a
+ * potentially different CPU) when the thread starts running again,
+ * before any access to the address.
+ */
+ dsb(ishst);
+ isb();
+}
+
+static inline void queue_pte_barriers(void)
+{
+ if (is_lazy_mmu_mode_active()) {
+ /* Avoid the atomic op if already set. */
+ if (!test_thread_flag(TIF_LAZY_MMU_PENDING))
+ set_thread_flag(TIF_LAZY_MMU_PENDING);
+ } else {
+ emit_pte_barriers();
+ }
+}
+
+static inline void arch_enter_lazy_mmu_mode(void) {}
+
+static inline void arch_flush_lazy_mmu_mode(void)
+{
+ if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING))
+ emit_pte_barriers();
+}
+
+static inline void arch_leave_lazy_mmu_mode(void)
+{
+ arch_flush_lazy_mmu_mode();
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
/* Set stride and tlb_level in flush_*_tlb_range */
#define flush_pmd_tlb_range(vma, addr, end) \
- __flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2)
+ __flush_tlb_range(vma, addr, end, PMD_SIZE, 2, TLBF_NONE)
#define flush_pud_tlb_range(vma, addr, end) \
- __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
+ __flush_tlb_range(vma, addr, end, PUD_SIZE, 1, TLBF_NONE)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * Outside of a few very special situations (e.g. hibernation), we always
- * use broadcast TLB invalidation instructions, therefore a spurious page
- * fault on one CPU which has been handled concurrently by another CPU
- * does not need to perform additional invalidation.
+ * We use local TLB invalidation instruction when reusing page in
+ * write protection fault handler to avoid TLBI broadcast in the hot
+ * path. This will cause spurious page faults if stale read-only TLB
+ * entries exist.
*/
-#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0)
+#define flush_tlb_fix_spurious_fault(vma, address, ptep) \
+ __flush_tlb_page(vma, address, TLBF_NOBROADCAST | TLBF_NONOTIFY)
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page))
+#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \
+ __flush_tlb_range(vma, address, address + PMD_SIZE, PMD_SIZE, 2, \
+ TLBF_NOBROADCAST | TLBF_NONOTIFY | TLBF_NOWALKCACHE)
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
-/*
- * Macros to convert between a physical address and its placement in a
- * page table entry, taking care of 52-bit addresses.
- */
#ifdef CONFIG_ARM64_PA_BITS_52
static inline phys_addr_t __pte_to_phys(pte_t pte)
{
@@ -84,8 +122,15 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK;
}
#else
-#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_LOW)
-#define __phys_to_pte_val(phys) (phys)
+static inline phys_addr_t __pte_to_phys(pte_t pte)
+{
+ return pte_val(pte) & PTE_ADDR_LOW;
+}
+
+static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
+{
+ return phys;
+}
#endif
#define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
@@ -93,8 +138,6 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pte_none(pte) (!pte_val(pte))
-#define __pte_clear(mm, addr, ptep) \
- __set_pte(ptep, __pte(0))
#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
/*
@@ -108,7 +151,6 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
#define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
-#define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
#define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \
PTE_ATTRINDX(MT_NORMAL_TAGGED))
@@ -212,7 +254,8 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
- pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+ if (pte_sw_dirty(pte))
+ pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
return pte;
}
@@ -273,9 +316,11 @@ static inline pte_t pte_mknoncont(pte_t pte)
return clear_pte_bit(pte, __pgprot(PTE_CONT));
}
-static inline pte_t pte_mkvalid(pte_t pte)
+static inline pte_t pte_mkvalid_k(pte_t pte)
{
- return set_pte_bit(pte, __pgprot(PTE_VALID));
+ pte = clear_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID));
+ pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_VALID_KERNEL));
+ return pte;
}
static inline pte_t pte_mkinvalid(pte_t pte)
@@ -290,9 +335,9 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
}
-static inline pte_t pte_mkdevmap(pte_t pte)
+static inline pmd_t pmd_mknoncont(pmd_t pmd)
{
- return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
+ return __pmd(pmd_val(pmd) & ~PMD_SECT_CONT);
}
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
@@ -317,18 +362,20 @@ static inline void __set_pte_nosync(pte_t *ptep, pte_t pte)
WRITE_ONCE(*ptep, pte);
}
-static inline void __set_pte(pte_t *ptep, pte_t pte)
+static inline void __set_pte_complete(pte_t pte)
{
- __set_pte_nosync(ptep, pte);
-
/*
* Only if the new pte is valid and kernel, otherwise TLB maintenance
- * or update_mmu_cache() have the necessary barriers.
+ * has the necessary barriers.
*/
- if (pte_valid_not_user(pte)) {
- dsb(ishst);
- isb();
- }
+ if (pte_valid_not_user(pte))
+ queue_pte_barriers();
+}
+
+static inline void __set_pte(pte_t *ptep, pte_t pte)
+{
+ __set_pte_nosync(ptep, pte);
+ __set_pte_complete(pte);
}
static inline pte_t __ptep_get(pte_t *ptep)
@@ -349,7 +396,7 @@ bool pgattr_change_is_safe(pteval_t old, pteval_t new);
* 1 0 | 1 0 1
* 1 1 | 0 1 x
*
- * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via
+ * When hardware DBM is not present, the software PTE_DIRTY bit is updated via
* the page fault mechanism. Checking the dirty status of a pte becomes:
*
* PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
@@ -420,23 +467,6 @@ static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte));
}
-static inline void __set_ptes(struct mm_struct *mm,
- unsigned long __always_unused addr,
- pte_t *ptep, pte_t pte, unsigned int nr)
-{
- page_table_check_ptes_set(mm, ptep, pte, nr);
- __sync_cache_and_tags(pte, nr);
-
- for (;;) {
- __check_safe_pte_update(mm, ptep, pte);
- __set_pte(ptep, pte);
- if (--nr == 0)
- break;
- ptep++;
- pte = pte_advance_pfn(pte, 1);
- }
-}
-
/*
* Hugetlb definitions.
*/
@@ -483,12 +513,12 @@ static inline pmd_t pte_pmd(pte_t pte)
static inline pgprot_t mk_pud_sect_prot(pgprot_t prot)
{
- return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT);
+ return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT);
}
static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
{
- return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
+ return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT);
}
static inline pte_t pte_swp_mkexclusive(pte_t pte)
@@ -496,7 +526,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte)
return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & PTE_SWP_EXCLUSIVE;
}
@@ -532,7 +562,7 @@ static inline int pte_protnone(pte_t pte)
/*
* pte_present_invalid() tells us that the pte is invalid from HW
* perspective but present from SW perspective, so the fields are to be
- * interpretted as per the HW layout. The second 2 checks are the unique
+ * interpreted as per the HW layout. The second 2 checks are the unique
* encoding that we use for PROT_NONE. It is insufficient to only use
* the first check because we share the same encoding scheme with pmds
* which support pmd_mkinvalid(), so can be present-invalid without
@@ -548,18 +578,6 @@ static inline int pmd_protnone(pmd_t pmd)
#endif
#define pmd_present(pmd) pte_present(pmd_pte(pmd))
-
-/*
- * THP definitions.
- */
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline int pmd_trans_huge(pmd_t pmd)
-{
- return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
@@ -572,6 +590,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkvalid_k(pmd) pte_pmd(pte_mkvalid_k(pmd_pte(pmd)))
#define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd)))
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
#define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd))
@@ -585,14 +604,17 @@ static inline int pmd_trans_huge(pmd_t pmd)
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
-#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd))
-#endif
-static inline pmd_t pmd_mkdevmap(pmd_t pmd)
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
- return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP)));
+ /*
+ * It's possible that the pmd is present-invalid on entry
+ * and in that case it needs to remain present-invalid on
+ * exit. So ensure the VALID bit does not get modified.
+ */
+ pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID;
+ pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID;
+
+ return __pmd((pmd_val(pmd) & ~mask) | val);
}
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
@@ -607,24 +629,31 @@ static inline pmd_t pmd_mkspecial(pmd_t pmd)
#define __phys_to_pmd_val(phys) __phys_to_pte_val(phys)
#define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#define pud_young(pud) pte_young(pud_pte(pud))
#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
+#define pud_mkwrite_novma(pud) pte_pud(pte_mkwrite_novma(pud_pte(pud)))
+#define pud_mkvalid_k(pud) pte_pud(pte_mkvalid_k(pud_pte(pud)))
#define pud_write(pud) pte_write(pud_pte(pud))
-#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT))
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+ /*
+ * It's possible that the pud is present-invalid on entry
+ * and in that case it needs to remain present-invalid on
+ * exit. So ensure the VALID bit does not get modified.
+ */
+ pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID;
+ pudval_t val = PUD_TYPE_SECT & ~PTE_VALID;
+
+ return __pud((pud_val(pud) & ~mask) | val);
+}
#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
-#define pud_special(pte) pte_special(pud_pte(pud))
-#define pud_mkspecial(pte) pte_pud(pte_mkspecial(pud_pte(pud)))
-#endif
-
#define pmd_pgprot pmd_pgprot
static inline pgprot_t pmd_pgprot(pmd_t pmd)
{
@@ -641,30 +670,63 @@ static inline pgprot_t pud_pgprot(pud_t pud)
return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud));
}
-static inline void __set_pte_at(struct mm_struct *mm,
- unsigned long __always_unused addr,
- pte_t *ptep, pte_t pte, unsigned int nr)
+static inline void __set_ptes_anysz(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr,
+ unsigned long pgsize)
{
- __sync_cache_and_tags(pte, nr);
- __check_safe_pte_update(mm, ptep, pte);
- __set_pte(ptep, pte);
+ unsigned long stride = pgsize >> PAGE_SHIFT;
+
+ switch (pgsize) {
+ case PAGE_SIZE:
+ page_table_check_ptes_set(mm, addr, ptep, pte, nr);
+ break;
+ case PMD_SIZE:
+ page_table_check_pmds_set(mm, addr, (pmd_t *)ptep,
+ pte_pmd(pte), nr);
+ break;
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ page_table_check_puds_set(mm, addr, (pud_t *)ptep,
+ pte_pud(pte), nr);
+ break;
+#endif
+ default:
+ VM_WARN_ON(1);
+ }
+
+ __sync_cache_and_tags(pte, nr * stride);
+
+ for (;;) {
+ __check_safe_pte_update(mm, ptep, pte);
+ __set_pte_nosync(ptep, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte = pte_advance_pfn(pte, stride);
+ }
+
+ __set_pte_complete(pte);
}
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmd)
+static inline void __set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
{
- page_table_check_pmd_set(mm, pmdp, pmd);
- return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd),
- PMD_SIZE >> PAGE_SHIFT);
+ __set_ptes_anysz(mm, addr, ptep, pte, nr, PAGE_SIZE);
}
-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
- pud_t *pudp, pud_t pud)
+static inline void __set_pmds(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd, unsigned int nr)
{
- page_table_check_pud_set(mm, pudp, pud);
- return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud),
- PUD_SIZE >> PAGE_SHIFT);
+ __set_ptes_anysz(mm, addr, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE);
}
+#define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1)
+
+static inline void __set_puds(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud, unsigned int nr)
+{
+ __set_ptes_anysz(mm, addr, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE);
+}
+#define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1)
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
@@ -716,26 +778,36 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_TABLE)
-#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
- PMD_TYPE_SECT)
-#define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd))
+
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
+{
+ return pmd_present(pmd) && !pmd_table(pmd);
+}
+
#define pmd_bad(pmd) (!pmd_table(pmd))
#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
#define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ /*
+ * If pmd is present-invalid, pmd_table() won't detect it
+ * as a table, so force the valid bit for the comparison.
+ */
+ return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
-static inline bool pud_sect(pud_t pud) { return false; }
static inline bool pud_table(pud_t pud) { return true; }
#else
-#define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \
- PUD_TYPE_SECT)
#define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \
PUD_TYPE_TABLE)
#endif
-extern pgd_t init_pg_dir[];
-extern pgd_t init_pg_end[];
extern pgd_t swapper_pg_dir[];
extern pgd_t idmap_pg_dir[];
extern pgd_t tramp_pg_dir[];
@@ -760,10 +832,8 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
WRITE_ONCE(*pmdp, pmd);
- if (pmd_valid(pmd)) {
- dsb(ishst);
- isb();
- }
+ if (pmd_valid(pmd))
+ queue_pte_barriers();
}
static inline void pmd_clear(pmd_t *pmdp)
@@ -793,22 +863,21 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
/* use ONLY for statically allocated translation tables */
#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot)
-
#if CONFIG_PGTABLE_LEVELS > 2
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!pud_table(pud))
+#define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \
+ PUD_TYPE_TABLE)
#define pud_present(pud) pte_present(pud_pte(pud))
#ifndef __PAGETABLE_PMD_FOLDED
-#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
+{
+ return pud_present(pud) && !pud_table(pud);
+}
#else
#define pud_leaf(pud) false
#endif
@@ -827,10 +896,8 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
WRITE_ONCE(*pudp, pud);
- if (pud_valid(pud)) {
- dsb(ishst);
- isb();
- }
+ if (pud_valid(pud))
+ queue_pte_barriers();
}
static inline void pud_clear(pud_t *pudp)
@@ -896,7 +963,9 @@ static inline bool mm_pud_folded(const struct mm_struct *mm)
pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
#define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d))
-#define p4d_bad(p4d) (pgtable_l4_enabled() && !(p4d_val(p4d) & P4D_TABLE_BIT))
+#define p4d_bad(p4d) (pgtable_l4_enabled() && \
+ ((p4d_val(p4d) & P4D_TYPE_MASK) != \
+ P4D_TYPE_TABLE))
#define p4d_present(p4d) (!p4d_none(p4d))
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
@@ -907,8 +976,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
}
WRITE_ONCE(*p4dp, p4d);
- dsb(ishst);
- isb();
+ queue_pte_barriers();
}
static inline void p4d_clear(p4d_t *p4dp)
@@ -1023,7 +1091,9 @@ static inline bool mm_p4d_folded(const struct mm_struct *mm)
pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e))
#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd))
-#define pgd_bad(pgd) (pgtable_l5_enabled() && !(pgd_val(pgd) & PGD_TABLE_BIT))
+#define pgd_bad(pgd) (pgtable_l5_enabled() && \
+ ((pgd_val(pgd) & PGD_TYPE_MASK) != \
+ PGD_TYPE_TABLE))
#define pgd_present(pgd) (!pgd_none(pgd))
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -1034,8 +1104,7 @@ static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
}
WRITE_ONCE(*pgdp, pgd);
- dsb(ishst);
- isb();
+ queue_pte_barriers();
}
static inline void pgd_clear(pgd_t *pgdp)
@@ -1182,9 +1251,18 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
}
-extern int __ptep_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep,
- pte_t entry, int dirty);
+extern int __ptep_set_access_flags_anysz(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep,
+ pte_t entry, int dirty,
+ unsigned long pgsize);
+
+static inline int __ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep,
+ pte_t entry, int dirty)
+{
+ return __ptep_set_access_flags_anysz(vma, address, ptep, entry, dirty,
+ PAGE_SIZE);
+}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
@@ -1192,33 +1270,23 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty)
{
- return __ptep_set_access_flags(vma, address, (pte_t *)pmdp,
- pmd_pte(entry), dirty);
-}
-
-static inline int pud_devmap(pud_t pud)
-{
- return 0;
-}
-
-static inline int pgd_devmap(pgd_t pgd)
-{
- return 0;
+ return __ptep_set_access_flags_anysz(vma, address, (pte_t *)pmdp,
+ pmd_pte(entry), dirty, PMD_SIZE);
}
#endif
#ifdef CONFIG_PAGE_TABLE_CHECK
-static inline bool pte_user_accessible_page(pte_t pte)
+static inline bool pte_user_accessible_page(struct mm_struct *mm, unsigned long addr, pte_t pte)
{
return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte));
}
-static inline bool pmd_user_accessible_page(pmd_t pmd)
+static inline bool pmd_user_accessible_page(struct mm_struct *mm, unsigned long addr, pmd_t pmd)
{
return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
}
-static inline bool pud_user_accessible_page(pud_t pud)
+static inline bool pud_user_accessible_page(struct mm_struct *mm, unsigned long addr, pud_t pud)
{
return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud));
}
@@ -1227,9 +1295,15 @@ static inline bool pud_user_accessible_page(pud_t pud)
/*
* Atomic pte/pmd modifications.
*/
-static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address,
- pte_t *ptep)
+
+static inline void __pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ __set_pte(ptep, __pte(0));
+}
+
+static inline bool __ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
{
pte_t old_pte, pte;
@@ -1244,10 +1318,10 @@ static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
return pte_young(pte);
}
-static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
+static inline bool __ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
{
- int young = __ptep_test_and_clear_young(vma, address, ptep);
+ bool young = __ptep_test_and_clear_young(vma, address, ptep);
if (young) {
/*
@@ -1258,7 +1332,7 @@ static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
* context-switch, which provides a DSB to complete the TLB
* invalidation.
*/
- flush_tlb_page_nosync(vma, address);
+ __flush_tlb_page(vma, address, TLBF_NOSYNC);
}
return young;
@@ -1266,9 +1340,8 @@ static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address,
- pmd_t *pmdp)
+static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
{
/* Operation applies to PMD table entry only if FEAT_HAFT is enabled */
VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft());
@@ -1276,16 +1349,38 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
-static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
+static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm,
+ unsigned long address,
+ pte_t *ptep,
+ unsigned long pgsize)
{
pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
- page_table_check_pte_clear(mm, pte);
+ switch (pgsize) {
+ case PAGE_SIZE:
+ page_table_check_pte_clear(mm, address, pte);
+ break;
+ case PMD_SIZE:
+ page_table_check_pmd_clear(mm, address, pte_pmd(pte));
+ break;
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ page_table_check_pud_clear(mm, address, pte_pud(pte));
+ break;
+#endif
+ default:
+ VM_WARN_ON(1);
+ }
return pte;
}
+static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ return __ptep_get_and_clear_anysz(mm, address, ptep, PAGE_SIZE);
+}
+
static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned int nr, int full)
{
@@ -1322,11 +1417,7 @@ static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm,
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
- pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
-
- page_table_check_pmd_clear(mm, pmd);
-
- return pmd;
+ return pte_pmd(__ptep_get_and_clear_anysz(mm, address, (pte_t *)pmdp, PMD_SIZE));
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -1415,7 +1506,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
- page_table_check_pmd_set(vma->vm_mm, pmdp, pmd);
+ page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
}
#endif
@@ -1519,6 +1610,14 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf,
*/
#define arch_wants_old_prefaulted_pte cpu_has_hw_af
+/*
+ * Request exec memory is read into pagecache in at least 64K folios. This size
+ * can be contpte-mapped when 4K base pages are in use (16 pages into 1 iTLB
+ * entry), and HPA can coalesce it (4 pages into 1 TLB entry) when 16K base
+ * pages are in use.
+ */
+#define exec_folio_order() ilog2(SZ_64K >> PAGE_SHIFT)
+
static inline bool pud_sect_supported(void)
{
return PAGE_SIZE == SZ_4K;
@@ -1535,6 +1634,16 @@ extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t old_pte, pte_t new_pte);
+#define modify_prot_start_ptes modify_prot_start_ptes
+extern pte_t modify_prot_start_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr);
+
+#define modify_prot_commit_ptes modify_prot_commit_ptes
+extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte,
+ unsigned int nr);
+
#ifdef CONFIG_ARM64_CONTPTE
/*
@@ -1555,10 +1664,10 @@ extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
unsigned long addr, pte_t *ptep,
unsigned int nr, int full);
-extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep);
-extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep);
+bool contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, unsigned int nr);
+bool contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, unsigned int nr);
extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned int nr);
extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
@@ -1721,28 +1830,43 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
return __ptep_get_and_clear(mm, addr, ptep);
}
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+#define test_and_clear_young_ptes test_and_clear_young_ptes
+static inline bool test_and_clear_young_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, unsigned int nr)
{
- pte_t orig_pte = __ptep_get(ptep);
-
- if (likely(!pte_valid_cont(orig_pte)))
+ if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
return __ptep_test_and_clear_young(vma, addr, ptep);
- return contpte_ptep_test_and_clear_young(vma, addr, ptep);
+ return contpte_test_and_clear_young_ptes(vma, addr, ptep, nr);
+}
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ return test_and_clear_young_ptes(vma, addr, ptep, 1);
}
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+static inline bool ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
{
pte_t orig_pte = __ptep_get(ptep);
if (likely(!pte_valid_cont(orig_pte)))
return __ptep_clear_flush_young(vma, addr, ptep);
- return contpte_ptep_clear_flush_young(vma, addr, ptep);
+ return contpte_clear_flush_young_ptes(vma, addr, ptep, 1);
+}
+
+#define clear_flush_young_ptes clear_flush_young_ptes
+static inline bool clear_flush_young_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+ if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
+ return __ptep_clear_flush_young(vma, addr, ptep);
+
+ return contpte_clear_flush_young_ptes(vma, addr, ptep, nr);
}
#define wrprotect_ptes wrprotect_ptes
@@ -1825,6 +1949,6 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
#endif /* CONFIG_ARM64_CONTPTE */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_PGTABLE_H */