diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-10 17:13:07 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-10 17:13:07 -0700 |
commit | 70442fc54e6889a2a77f0e9554e8188a1557f00e (patch) | |
tree | 24856824866827a50c3f37b98bf1cf25cd89ff47 /arch | |
parent | e2302539dd4f1c62d96651c07ddb05aa2461d29c (diff) | |
parent | c5129ecc12a3101555d8922b1e0aa90f91247ab6 (diff) | |
download | lwn-70442fc54e6889a2a77f0e9554e8188a1557f00e.tar.gz lwn-70442fc54e6889a2a77f0e9554e8188a1557f00e.zip |
Merge tag 'x86_mm_for_v6.1_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Dave Hansen:
"There are some small things here, plus one big one.
The big one detected and refused to create W+X kernel mappings. This
caused a bit of trouble and it is entirely disabled on 32-bit due to
known unfixable EFI issues. It also oopsed on some systemd eBPF use,
which kept some users from booting.
The eBPF issue is fixed, but those troubles were caught relatively
recently which made me nervous that there are more lurking. The final
commit in here retains the warnings, but doesn't actually refuse to
create W+X mappings.
Summary:
- Detect insecure W+X mappings and warn about them, including a few
bug fixes and relaxing the enforcement
- Do a long-overdue defconfig update and enabling W+X boot-time
detection
- Cleanup _PAGE_PSE handling (follow-up on an earlier bug)
- Rename a change_page_attr function"
* tag 'x86_mm_for_v6.1_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mm: Ease W^X enforcement back to just a warning
x86/mm: Disable W^X detection and enforcement on 32-bit
x86/mm: Add prot_sethuge() helper to abstract out _PAGE_PSE handling
x86/mm/32: Fix W^X detection when page tables do not support NX
x86/defconfig: Enable CONFIG_DEBUG_WX=y
x86/defconfig: Refresh the defconfigs
x86/mm: Refuse W^X violations
x86/mm: Rename set_memory_present() to set_memory_p()
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/configs/i386_defconfig | 6 | ||||
-rw-r--r-- | arch/x86/configs/x86_64_defconfig | 5 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 19 | ||||
-rw-r--r-- | arch/x86/mm/pat/set_memory.c | 49 |
4 files changed, 64 insertions, 15 deletions
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 7207219509f6..3cf34912abfe 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -27,7 +27,6 @@ CONFIG_CGROUP_MISC=y CONFIG_CGROUP_DEBUG=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y -# CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_SMP=y CONFIG_HYPERVISOR_GUEST=y @@ -44,6 +43,7 @@ CONFIG_EFI_STUB=y CONFIG_HZ_1000=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y +# CONFIG_RETHUNK is not set CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y CONFIG_PM_TRACE_RTC=y @@ -62,6 +62,7 @@ CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BINFMT_MISC=y +# CONFIG_COMPAT_BRK is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -269,9 +270,10 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_KERNEL=y CONFIG_FRAME_WARN=1024 CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_WX=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 5ce67b73e218..27759236fd60 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -26,7 +26,6 @@ CONFIG_CGROUP_MISC=y CONFIG_CGROUP_DEBUG=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y -# CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_SMP=y CONFIG_HYPERVISOR_GUEST=y @@ -62,6 +61,7 @@ CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BINFMT_MISC=y +# CONFIG_COMPAT_BRK is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -267,8 +267,9 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_PRINTK_TIME=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_WX=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0fe690ebc269..7ea7d4745681 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -90,6 +90,12 @@ DEFINE_ENTRY(pud, pud, init) DEFINE_ENTRY(pmd, pmd, init) DEFINE_ENTRY(pte, pte, init) +static inline pgprot_t prot_sethuge(pgprot_t prot) +{ + WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PAT); + + return __pgprot(pgprot_val(prot) | _PAGE_PSE); +} /* * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the @@ -557,9 +563,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<<PG_LEVEL_2M)) { pages++; spin_lock(&init_mm.page_table_lock); - set_pte_init((pte_t *)pmd, - pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, - __pgprot(pgprot_val(prot) | _PAGE_PSE)), + set_pmd_init(pmd, + pfn_pmd(paddr >> PAGE_SHIFT, prot_sethuge(prot)), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; @@ -644,12 +649,8 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<<PG_LEVEL_1G)) { pages++; spin_lock(&init_mm.page_table_lock); - - prot = __pgprot(pgprot_val(prot) | _PAGE_PSE); - - set_pte_init((pte_t *)pud, - pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, - prot), + set_pud_init(pud, + pfn_pud(paddr >> PAGE_SHIFT, prot_sethuge(prot)), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 1abd5438f126..97342c42dda8 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -580,6 +580,46 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, } /* + * Validate strict W^X semantics. + */ +static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start, + unsigned long pfn, unsigned long npg) +{ + unsigned long end; + + /* + * 32-bit has some unfixable W+X issues, like EFI code + * and writeable data being in the same page. Disable + * detection and enforcement there. + */ + if (IS_ENABLED(CONFIG_X86_32)) + return new; + + /* Only verify when NX is supported: */ + if (!(__supported_pte_mask & _PAGE_NX)) + return new; + + if (!((pgprot_val(old) ^ pgprot_val(new)) & (_PAGE_RW | _PAGE_NX))) + return new; + + if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW) + return new; + + end = start + npg * PAGE_SIZE - 1; + WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n", + (unsigned long long)pgprot_val(old), + (unsigned long long)pgprot_val(new), + start, end, pfn); + + /* + * For now, allow all permission change attempts by returning the + * attempted permissions. This can 'return old' to actively + * refuse the permission change at a later time. + */ + return new; +} + +/* * Lookup the page table entry for a virtual address in a specific pgd. * Return a pointer to the entry and the level of the mapping. */ @@ -885,6 +925,8 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address, new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, psize, CPA_DETECT); + new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages); + /* * If there is a conflict, split the large page. * @@ -1525,6 +1567,7 @@ repeat: if (level == PG_LEVEL_4K) { pte_t new_pte; + pgprot_t old_prot = pte_pgprot(old_pte); pgprot_t new_prot = pte_pgprot(old_pte); unsigned long pfn = pte_pfn(old_pte); @@ -1536,6 +1579,8 @@ repeat: new_prot = static_protections(new_prot, address, pfn, 1, 0, CPA_PROTECT); + new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1); + new_prot = pgprot_clear_protnone_bits(new_prot); /* @@ -1944,7 +1989,7 @@ int set_mce_nospec(unsigned long pfn) return rc; } -static int set_memory_present(unsigned long *addr, int numpages) +static int set_memory_p(unsigned long *addr, int numpages) { return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0); } @@ -1954,7 +1999,7 @@ int clear_mce_nospec(unsigned long pfn) { unsigned long addr = (unsigned long) pfn_to_kaddr(pfn); - return set_memory_present(&addr, 1); + return set_memory_p(&addr, 1); } EXPORT_SYMBOL_GPL(clear_mce_nospec); #endif /* CONFIG_X86_64 */ |