diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-09 09:54:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-09 09:54:46 -0700 |
commit | a5ad5742f671de906adbf29fbedf0a04705cebad (patch) | |
tree | 88d1a4c18e2025a5a8335dbbc9dea8bebeba5789 /arch/x86 | |
parent | 013b2deba9a6b80ca02f4fafd7dedf875e9b4450 (diff) | |
parent | 4fa7252338a56fbc90220e6330f136a379175a7a (diff) | |
download | lwn-a5ad5742f671de906adbf29fbedf0a04705cebad.tar.gz lwn-a5ad5742f671de906adbf29fbedf0a04705cebad.zip |
Merge branch 'akpm' (patches from Andrew)
Merge even more updates from Andrew Morton:
- a kernel-wide sweep of show_stack()
- pagetable cleanups
- abstract out accesses to mmap_sem - prep for mmap_sem scalability work
- hch's user acess work
Subsystems affected by this patch series: debug, mm/pagemap, mm/maccess,
mm/documentation.
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (93 commits)
include/linux/cache.h: expand documentation over __read_mostly
maccess: return -ERANGE when probe_kernel_read() fails
x86: use non-set_fs based maccess routines
maccess: allow architectures to provide kernel probing directly
maccess: move user access routines together
maccess: always use strict semantics for probe_kernel_read
maccess: remove strncpy_from_unsafe
tracing/kprobes: handle mixed kernel/userspace probes better
bpf: rework the compat kernel probe handling
bpf:bpf_seq_printf(): handle potentially unsafe format string better
bpf: handle the compat string in bpf_trace_copy_string better
bpf: factor out a bpf_trace_copy_string helper
maccess: unify the probe kernel arch hooks
maccess: remove probe_read_common and probe_write_common
maccess: rename strnlen_unsafe_user to strnlen_user_nofault
maccess: rename strncpy_from_unsafe_strict to strncpy_from_kernel_nofault
maccess: rename strncpy_from_unsafe_user to strncpy_from_user_nofault
maccess: update the top of file comment
maccess: clarify kerneldoc comments
maccess: remove duplicate kerneldoc comments
...
Diffstat (limited to 'arch/x86')
86 files changed, 153 insertions, 291 deletions
diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c index 9557c5a15b91..f9c5c13d979b 100644 --- a/arch/x86/boot/compressed/kaslr_64.c +++ b/arch/x86/boot/compressed/kaslr_64.c @@ -22,8 +22,8 @@ #include "misc.h" /* These actually do the work of building the kernel identity maps. */ +#include <linux/pgtable.h> #include <asm/init.h> -#include <asm/pgtable.h> /* Use the static base for this part of the boot process */ #undef __PAGE_OFFSET #define __PAGE_OFFSET __PAGE_OFFSET_BASE diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 43428cc514c8..ea7c1f0b79df 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -144,7 +144,7 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) struct mm_struct *mm = task->mm; struct vm_area_struct *vma; - if (down_write_killable(&mm->mmap_sem)) + if (mmap_write_lock_killable(mm)) return -EINTR; for (vma = mm->mmap; vma; vma = vma->vm_next) { @@ -154,7 +154,7 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) zap_page_range(vma, vma->vm_start, size); } - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return 0; } #else @@ -268,7 +268,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) unsigned long text_start; int ret = 0; - if (down_write_killable(&mm->mmap_sem)) + if (mmap_write_lock_killable(mm)) return -EINTR; addr = get_unmapped_area(NULL, addr, @@ -311,7 +311,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) } up_fail: - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return ret; } @@ -373,7 +373,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr) struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - down_write(&mm->mmap_sem); + mmap_write_lock(mm); /* * Check if we have already mapped vdso blob - fail to prevent * abusing from userspace install_speciall_mapping, which may @@ -384,11 +384,11 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr) for (vma = mm->mmap; vma; vma = vma->vm_next) { if (vma_is_special_mapping(vma, &vdso_mapping) || vma_is_special_mapping(vma, &vvar_mapping)) { - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return -EEXIST; } } - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return map_vdso(image, addr); } diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index ea2a3d151294..4103665c6e03 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2178,10 +2178,10 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) * userspace with CR4.PCE clear while another task is still * doing on_each_cpu_mask() to propagate CR4.PCE. * - * For now, this can't happen because all callers hold mmap_sem + * For now, this can't happen because all callers hold mmap_lock * for write. If this changes, we'll need a different solution. */ - lockdep_assert_held_write(&mm->mmap_sem); + mmap_assert_write_locked(mm); if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1); diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h index 8e25bf4f323a..62da760d6d5a 100644 --- a/arch/x86/include/asm/agp.h +++ b/arch/x86/include/asm/agp.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_AGP_H #define _ASM_X86_AGP_H -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include <asm/cacheflush.h> /* diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 9bf2620ce817..5a42f9206138 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -1,13 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <asm/ftrace.h> #include <linux/uaccess.h> +#include <linux/pgtable.h> #include <asm/string.h> #include <asm/page.h> #include <asm/checksum.h> #include <asm-generic/asm-prototypes.h> -#include <asm/pgtable.h> #include <asm/special_insns.h> #include <asm/preempt.h> #include <asm/asm.h> diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 89dcc7aa7e2c..e7d2ccfdd507 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,13 +3,13 @@ #define _ASM_X86_EFI_H #include <asm/fpu/api.h> -#include <asm/pgtable.h> #include <asm/processor-flags.h> #include <asm/tlb.h> #include <asm/nospec-branch.h> #include <asm/mmu_context.h> #include <linux/build_bug.h> #include <linux/kernel.h> +#include <linux/pgtable.h> extern unsigned long efi_fw_vendor, efi_config_table; diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 2a7b3211ee7a..bacf68c4d70e 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -10,7 +10,6 @@ #include <linux/mm.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> void __iomem * diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index db7ba2feb947..0648190467ba 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -6,8 +6,10 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY void kernel_randomize_memory(void); +void init_trampoline_kaslr(void); #else static inline void kernel_randomize_memory(void) { } +static inline void init_trampoline_kaslr(void) {} #endif /* CONFIG_RANDOMIZE_MEMORY */ #endif diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index bdeae9291e5c..0a301ad0b02f 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -45,7 +45,7 @@ typedef struct { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS /* * One bit per protection key says whether userspace can - * use it or not. protected by mmap_sem. + * use it or not. protected by mmap_lock. */ u16 pkey_allocation_map; s16 execute_only_pkey; diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 5afb5e0fe903..e896ebef8c24 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -39,23 +39,23 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with * a "*pmdp" dereference done by GCC. Problem is, in certain places * where pte_offset_map_lock() is called, concurrent page faults are - * allowed, if the mmap_sem is hold for reading. An example is mincore + * allowed, if the mmap_lock is hold for reading. An example is mincore * vs page faults vs MADV_DONTNEED. On the page fault side * pmd_populate() rightfully does a set_64bit(), but if we're reading the * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen * because GCC will not read the 64-bit value of the pmd atomically. * * To fix this all places running pte_offset_map_lock() while holding the - * mmap_sem in read mode, shall read the pmdp pointer using this + * mmap_lock in read mode, shall read the pmdp pointer using this * function to know if the pmd is null or not, and in turn to know if * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd * operations. * - * Without THP if the mmap_sem is held for reading, the pmd can only + * Without THP if the mmap_lock is held for reading, the pmd can only * transition from null to not null while pmd_read_atomic() runs. So * we can always return atomic pmd values with this function. * - * With THP if the mmap_sem is held for reading, the pmd can become + * With THP if the mmap_lock is held for reading, the pmd can become * trans_huge or none or point to a pte (and in turn become "stable") * at any time under pmd_read_atomic(). We could read it truly * atomically here with an atomic64_read() for the THP enabled case (and diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index b8f46bbe69f4..76aa21e8128d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -802,7 +802,7 @@ static inline int pmd_present(pmd_t pmd) #ifdef CONFIG_NUMA_BALANCING /* * These work without NUMA balancing but the kernel does not care. See the - * comment in include/asm-generic/pgtable.h + * comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { @@ -837,17 +837,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) /* - * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] - * - * this macro returns the index of the entry in the pmd page which would - * control the given virtual address - */ -static inline unsigned long pmd_index(unsigned long address) -{ - return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); -} - -/* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * @@ -856,25 +845,6 @@ static inline unsigned long pmd_index(unsigned long address) */ #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) -/* - * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] - * - * this function returns the index of the entry in the pte page which would - * control the given virtual address - * - * Also define macro so we can test if pte_index is defined for arch. - */ -#define pte_index pte_index -static inline unsigned long pte_index(unsigned long address) -{ - return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); -} - -static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) -{ - return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); -} - static inline int pmd_bad(pmd_t pmd) { return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; @@ -907,12 +877,6 @@ static inline unsigned long pud_page_vaddr(pud_t pud) */ #define pud_page(pud) pfn_to_page(pud_pfn(pud)) -/* Find an entry in the second-level page table.. */ -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); -} - #define pud_leaf pud_large static inline int pud_large(pud_t pud) { @@ -932,11 +896,6 @@ static inline int pud_large(pud_t pud) } #endif /* CONFIG_PGTABLE_LEVELS > 2 */ -static inline unsigned long pud_index(unsigned long address) -{ - return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); -} - #if CONFIG_PGTABLE_LEVELS > 3 static inline int p4d_none(p4d_t p4d) { @@ -959,12 +918,6 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d) */ #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) -/* Find an entry in the third-level page table.. */ -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) -{ - return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); -} - static inline int p4d_bad(p4d_t p4d) { unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; @@ -1037,30 +990,6 @@ static inline int pgd_none(pgd_t pgd) #endif /* __ASSEMBLY__ */ -/* - * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] - * - * this macro returns the index of the entry in the pgd page which would - * control the given virtual address - */ -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) - -/* - * pgd_offset() returns a (pgd_t *) - * pgd_index() is used get the offset into the pgd page's array of pgd_t's; - */ -#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) -/* - * a shortcut to get a pgd_t in a given mm - */ -#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) -/* - * a shortcut which implies the use of the kernel's pgd, instead - * of a process's - */ -#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) - - #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) @@ -1071,27 +1000,14 @@ void init_mem_mapping(void); void early_alloc_pgt_buf(void); extern void memblock_find_dma_reserve(void); + #ifdef CONFIG_X86_64 -/* Realmode trampoline initialization. */ extern pgd_t trampoline_pgd_entry; -static inline void __meminit init_trampoline_default(void) -{ - /* Default trampoline pgd value */ - trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; -} void __init poking_init(void); unsigned long init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot); - -# ifdef CONFIG_RANDOMIZE_MEMORY -void __meminit init_trampoline(void); -# else -# define init_trampoline init_trampoline_default -# endif -#else -static inline void init_trampoline(void) { } #endif /* local pte updates need not use xchg for locking */ @@ -1546,7 +1462,6 @@ static inline bool arch_faults_on_old_pte(void) return false; } -#include <asm-generic/pgtable.h> #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_H */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 82dc0d8464fa..ef76a04b4daf 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -45,17 +45,6 @@ void sync_initial_page_table(void); # include <asm/pgtable-2level.h> #endif -#if defined(CONFIG_HIGHPTE) -#define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \ - pte_index((address))) -#define pte_unmap(pte) kunmap_atomic((pte)) -#else -#define pte_offset_map(dir, address) \ - ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address))) -#define pte_unmap(pte) do { } while (0) -#endif - /* Clear a kernel PTE and flush it from the TLB */ #define kpte_clear_flush(ptep, vaddr) \ do { \ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 8d03ffd43794..1b68d24dc6a0 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -186,10 +186,6 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); /* PTE - Level 1 access. */ -/* x86-64 always has all page tables mapped. */ -#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) -#define pte_unmap(pte) ((void)(pte))/* NOP */ - /* * Encode and de-code a swap entry * diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ed8ec011a9fd..84b645cc8bc9 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -75,7 +75,17 @@ extern char _text[]; static inline bool kaslr_enabled(void) { - return !!(boot_params.hdr.loadflags & KASLR_FLAG); + return IS_ENABLED(CONFIG_RANDOMIZE_MEMORY) && + !!(boot_params.hdr.loadflags & KASLR_FLAG); +} + +/* + * Apply no randomization if KASLR was disabled at boot or if KASAN + * is enabled. KASAN shadow mappings rely on regions being PGD aligned. + */ +static inline bool kaslr_memory_enabled(void) +{ + return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); } static inline unsigned long kaslr_offset(void) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 14db05086bbf..5ae5a68e469d 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -87,7 +87,7 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs) } void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, char *log_lvl); + unsigned long *stack, const char *log_lvl); /* The form of the top of the frame on the stack */ struct stack_frame { diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d8f283b9a569..35b23b0311f5 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -523,5 +523,21 @@ do { \ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ } while (0) +#define HAVE_GET_KERNEL_NOFAULT + +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + int __kr_err; \ + \ + __get_user_size(*((type *)dst), (__force type __user *)src, \ + sizeof(type), __kr_err); \ + if (unlikely(__kr_err)) \ + goto err_label; \ +} while (0) + +#define __put_kernel_nofault(dst, src, type, err_label) \ + __put_user_size(*((type *)(src)), (__force type __user *)(dst), \ + sizeof(type), err_label) + #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index d50c7b747d8b..ba4c1b15908b 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -38,11 +38,11 @@ #include <linux/errno.h> #include <linux/string.h> #include <linux/types.h> +#include <linux/pgtable.h> #include <trace/events/xen.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/smap.h> #include <asm/nospec-branch.h> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 790ce08e41f2..5941e18edd5a 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -11,7 +11,6 @@ #include <asm/extable.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <xen/interface/xen.h> #include <xen/interface/grant_table.h> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 683ed9e12e6b..7bdc0239a943 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -20,11 +20,11 @@ #include <linux/pci.h> #include <linux/efi-bgrt.h> #include <linux/serial_core.h> +#include <linux/pgtable.h> #include <asm/e820/api.h> #include <asm/irqdomain.h> #include <asm/pci_x86.h> -#include <asm/pgtable.h> #include <asm/io_apic.h> #include <asm/apic.h> #include <asm/io.h> diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ed3b04483972..cc1fea76aab0 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -10,9 +10,9 @@ #include <linux/memblock.h> #include <linux/dmi.h> #include <linux/cpumask.h> +#include <linux/pgtable.h> #include <asm/segment.h> #include <asm/desc.h> -#include <asm/pgtable.h> #include <asm/cacheflush.h> #include <asm/realmode.h> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index cd617979b7fc..a9195ce8265d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -18,7 +18,6 @@ #include <asm/text-patching.h> #include <asm/alternative.h> #include <asm/sections.h> -#include <asm/pgtable.h> #include <asm/mce.h> #include <asm/nmi.h> #include <asm/cacheflush.h> diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 16133819415c..17cb5b933dcf 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -33,7 +33,6 @@ #include <linux/atomic.h> #include <linux/dma-direct.h> #include <asm/mtrr.h> -#include <asm/pgtable.h> #include <asm/proto.h> #include <asm/iommu.h> #include <asm/gart.h> @@ -159,7 +158,7 @@ static void dump_leak(void) return; dump = 1; - show_stack(NULL, NULL); + show_stack(NULL, NULL, KERN_ERR); debug_dma_dump_mappings(NULL); } #endif diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index cdf45b4700f2..35edd57f064a 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -12,11 +12,11 @@ */ #include <linux/types.h> #include <linux/init.h> +#include <linux/pgtable.h> #include <asm/numachip/numachip.h> #include <asm/numachip/numachip_csr.h> -#include <asm/pgtable.h> #include "local.h" diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 56978cb06149..b6f887be440c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -15,6 +15,7 @@ #include <linux/nospec.h> #include <linux/prctl.h> #include <linux/sched/smt.h> +#include <linux/pgtable.h> #include <asm/spec-ctrl.h> #include <asm/cmdline.h> @@ -26,7 +27,6 @@ #include <asm/vmx.h> #include <asm/paravirt.h> #include <asm/alternative.h> -#include <asm/pgtable.h> #include <asm/set_memory.h> #include <asm/intel-family.h> #include <asm/e820/api.h> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 40776ef469c9..8be042df12c3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -21,6 +21,7 @@ #include <linux/smp.h> #include <linux/io.h> #include <linux/syscore_ops.h> +#include <linux/pgtable.h> #include <asm/stackprotector.h> #include <asm/perf_event.h> @@ -35,7 +36,6 @@ #include <asm/vsyscall.h> #include <linux/topology.h> #include <linux/cpumask.h> -#include <asm/pgtable.h> #include <linux/atomic.h> #include <asm/proto.h> #include <asm/setup.h> diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 166d7c355896..63926c94eb5f 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> +#include <linux/pgtable.h> #include <linux/string.h> #include <linux/bitops.h> @@ -11,7 +12,6 @@ #include <linux/uaccess.h> #include <asm/cpufeature.h> -#include <asm/pgtable.h> #include <asm/msr.h> #include <asm/bugs.h> #include <asm/cpu.h> diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 4bd28b388a1a..0daf2f1cf7a8 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -1326,9 +1326,9 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) * pseudo-locked region will still be here on return. * * The mutex has to be released temporarily to avoid a potential - * deadlock with the mm->mmap_sem semaphore which is obtained in - * the device_create() and debugfs_create_dir() callpath below - * as well as before the mmap() callback is called. + * deadlock with the mm->mmap_lock which is obtained in the + * device_create() and debugfs_create_dir() callpath below as well as + * before the mmap() callback is called. */ mutex_unlock(&rdtgroup_mutex); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index d7cb5ab0d1f0..23b4b61319d3 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -3199,10 +3199,10 @@ int __init rdtgroup_init(void) * during the debugfs directory creation also &sb->s_type->i_mutex_key * (the lockdep class of inode->i_rwsem). Other filesystem * interactions (eg. SyS_getdents) have the lock ordering: - * &sb->s_type->i_mutex_key --> &mm->mmap_sem - * During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex + * &sb->s_type->i_mutex_key --> &mm->mmap_lock + * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex * is taken, thus creating dependency: - * &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause + * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause * issues considering the other two lock dependencies. * By creating the debugfs directory here we avoid a dependency * that may cause deadlock (even though file operations cannot diff --git a/arch/x86/kernel/crash_core_32.c b/arch/x86/kernel/crash_core_32.c index c0159a7bca6d..8a89c109e20a 100644 --- a/arch/x86/kernel/crash_core_32.c +++ b/arch/x86/kernel/crash_core_32.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/crash_core.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/setup.h> void arch_crash_save_vmcoreinfo(void) diff --git a/arch/x86/kernel/crash_core_64.c b/arch/x86/kernel/crash_core_64.c index 845a57eb4eb7..7d255f882afe 100644 --- a/arch/x86/kernel/crash_core_64.c +++ b/arch/x86/kernel/crash_core_64.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/crash_core.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/setup.h> void arch_crash_save_vmcoreinfo(void) diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 3793646f0fb5..2ccc57f152a4 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -6,7 +6,6 @@ #include <linux/fs.h> #include <linux/uaccess.h> -#include <asm/pgtable.h> #include <asm/processor.h> #include <asm/desc.h> #include <asm/traps.h> diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ae64ec7f752f..456511b2284e 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -65,7 +65,7 @@ bool in_entry_stack(unsigned long *stack, struct stack_info *info) } static void printk_stack_address(unsigned long address, int reliable, - char *log_lvl) + const char *log_lvl) { touch_nmi_watchdog(); printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); @@ -160,7 +160,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, } void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, char *log_lvl) + unsigned long *stack, const char *log_lvl) { struct unwind_state state; struct stack_info stack_info = {0}; @@ -279,7 +279,8 @@ next: } } -void show_stack(struct task_struct *task, unsigned long *sp) +void show_stack(struct task_struct *task, unsigned long *sp, + const char *loglvl) { task = task ? : current; @@ -290,7 +291,7 @@ void show_stack(struct task_struct *task, unsigned long *sp) if (!sp && task == current) sp = get_stack_pointer(current, NULL); - show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT); + show_trace_log_lvl(task, NULL, sp, loglvl); } void show_stack_regs(struct pt_regs *regs) diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 93fbdff2974f..d3c531d3b244 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -8,6 +8,7 @@ #include <linux/pci_regs.h> #include <linux/pci_ids.h> #include <linux/errno.h> +#include <linux/pgtable.h> #include <asm/io.h> #include <asm/processor.h> #include <asm/fcntl.h> @@ -15,7 +16,6 @@ #include <xen/hvc-console.h> #include <asm/pci-direct.h> #include <asm/fixmap.h> -#include <asm/pgtable.h> #include <linux/usb/ehci_def.h> #include <linux/usb/xhci-dbgp.h> #include <asm/pci_x86.h> diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 12e7d4406c32..4fe7af58cfe1 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -29,7 +29,7 @@ #include <linux/percpu.h> #include <linux/gfp.h> #include <linux/random.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include <asm/pgalloc.h> #include <asm/setup.h> #include <asm/espfix.h> diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 206a4b6144c2..cbb71c1b574f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -20,13 +20,13 @@ #include <linux/io.h> #include <linux/memblock.h> #include <linux/mem_encrypt.h> +#include <linux/pgtable.h> #include <asm/processor.h> #include <asm/proto.h> #include <asm/smp.h> #include <asm/setup.h> #include <asm/desc.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/kdebug.h> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 4bbc770af632..4fc33fdf0f16 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -13,8 +13,8 @@ #include <linux/linkage.h> #include <linux/threads.h> #include <linux/init.h> +#include <linux/pgtable.h> #include <asm/segment.h> -#include <asm/pgtable.h> #include <asm/page.h> #include <asm/msr.h> #include <asm/cache.h> diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 519649ddf100..f3c76252247d 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -15,11 +15,11 @@ #include <linux/acpi.h> #include <linux/io.h> #include <linux/delay.h> +#include <linux/pgtable.h> #include <linux/atomic.h> #include <asm/timer.h> #include <asm/hw_irq.h> -#include <asm/pgtable.h> #include <asm/desc.h> #include <asm/apic.h> #include <asm/i8259.h> diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 5aa523c2d573..dd73135d7cee 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -16,11 +16,11 @@ #include <linux/acpi.h> #include <linux/io.h> #include <linux/delay.h> +#include <linux/pgtable.h> #include <linux/atomic.h> #include <asm/timer.h> #include <asm/hw_irq.h> -#include <asm/pgtable.h> #include <asm/desc.h> #include <asm/apic.h> #include <asm/setup.h> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4d7022a740ab..85de8fa69b24 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -41,11 +41,11 @@ #include <linux/kasan.h> #include <linux/moduleloader.h> #include <linux/vmalloc.h> +#include <linux/pgtable.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> #include <asm/desc.h> -#include <asm/pgtable.h> #include <linux/uaccess.h> #include <asm/alternative.h> #include <asm/insn.h> diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index ea13f6888284..234f58e0fe8c 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -16,11 +16,11 @@ #include <linux/kallsyms.h> #include <linux/ftrace.h> #include <linux/frame.h> +#include <linux/pgtable.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> #include <asm/desc.h> -#include <asm/pgtable.h> #include <linux/uaccess.h> #include <asm/alternative.h> #include <asm/insn.h> diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 84c3ba32f211..8748321c4486 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -8,7 +8,7 @@ * * Lock order: * contex.ldt_usr_sem - * mmap_sem + * mmap_lock * context.lock */ diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 02bddfc122a4..64b00b0d7fe8 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -13,7 +13,6 @@ #include <linux/gfp.h> #include <linux/io.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index ad5cdd6a5f23..a29a44a98e5b 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -19,7 +19,6 @@ #include <linux/efi.h> #include <asm/init.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/io_apic.h> diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 23c95a53d20e..34b153cbd4ac 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -22,7 +22,6 @@ #include <asm/text-patching.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/setup.h> #include <asm/unwind.h> diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 5638e4ae2ea6..674a7d66d960 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -13,13 +13,13 @@ #include <linux/bcd.h> #include <linux/highmem.h> #include <linux/kprobes.h> +#include <linux/pgtable.h> #include <asm/bug.h> #include <asm/paravirt.h> #include <asm/debugreg.h> #include <asm/desc.h> #include <asm/setup.h> -#include <asm/pgtable.h> #include <asm/time.h> #include <asm/pgalloc.h> #include <asm/irq.h> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 538d4e8d6589..acfd6d2a0cbf 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -39,7 +39,6 @@ #include <linux/kdebug.h> #include <linux/syscalls.h> -#include <asm/pgtable.h> #include <asm/ldt.h> #include <asm/processor.h> #include <asm/fpu/internal.h> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0c169a5687e1..9a97415b2139 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -40,7 +40,6 @@ #include <linux/ftrace.h> #include <linux/syscalls.h> -#include <asm/pgtable.h> #include <asm/processor.h> #include <asm/fpu/internal.h> #include <asm/mmu_context.h> diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f0e1ddbc2fd7..44130588987f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -28,7 +28,6 @@ #include <linux/nospec.h> #include <linux/uaccess.h> -#include <asm/pgtable.h> #include <asm/processor.h> #include <asm/fpu/internal.h> #include <asm/fpu/signal.h> diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 3ca43be4f9cf..e040ba6be27b 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -11,13 +11,13 @@ #include <linux/tboot.h> #include <linux/delay.h> #include <linux/frame.h> +#include <linux/pgtable.h> #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> #include <asm/io_apic.h> #include <asm/desc.h> #include <asm/hpet.h> -#include <asm/pgtable.h> #include <asm/proto.h> #include <asm/reboot_fixups.h> #include <asm/reboot.h> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2467f3dd35d3..ffbd9a3d78d8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -55,6 +55,7 @@ #include <linux/gfp.h> #include <linux/cpuidle.h> #include <linux/numa.h> +#include <linux/pgtable.h> #include <asm/acpi.h> #include <asm/desc.h> @@ -63,7 +64,6 @@ #include <asm/realmode.h> #include <asm/cpu.h> #include <asm/numa.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mtrr.h> #include <asm/mwait.h> diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index b2942b2dbfcf..992fb1415c0f 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -23,7 +23,6 @@ #include <asm/realmode.h> #include <asm/processor.h> #include <asm/bootparam.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/swiotlb.h> #include <asm/fixmap.h> @@ -94,7 +93,7 @@ static struct mm_struct tboot_mm = { .pgd = swapper_pg_dir, .mm_users = ATOMIC_INIT(2), .mm_count = ATOMIC_INIT(1), - .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), + MMAP_LOCK_INITIALIZER(init_mm) .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), }; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 47a8676c7395..764573de3996 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -171,7 +171,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) pte_t *pte; int i; - down_write(&mm->mmap_sem); + mmap_write_lock(mm); pgd = pgd_offset(mm, 0xA0000); if (pgd_none_or_clear_bad(pgd)) goto out; @@ -197,7 +197,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) } pte_unmap_unlock(pte, ptl); out: - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, PAGE_SHIFT, false); } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 38c576495048..a6d484ea110b 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -165,22 +165,22 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long pfn; unsigned long paddr; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE); if (!vma || !(vma->vm_flags & VM_PFNMAP)) { - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); return -EFAULT; } pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; paddr = pfn << PAGE_SHIFT; table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB); if (!table) { - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); return -EFAULT; } ret = CMPXCHG(&table[index], orig_pte, new_pte); memunmap(table); - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); } return (ret != orig_pte); diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 5199d8a1daf1..6f8b48f545f4 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -4,9 +4,9 @@ #include <linux/percpu.h> #include <linux/kallsyms.h> #include <linux/kcore.h> +#include <linux/pgtable.h> #include <asm/cpu_entry_area.h> -#include <asm/pgtable.h> #include <asm/fixmap.h> #include <asm/desc.h> diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index 4a3b62f780b4..092ea436c7e6 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -3,7 +3,7 @@ #include <linux/efi.h> #include <linux/module.h> #include <linux/seq_file.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> static int ptdump_show(struct seq_file *m, void *v) { diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index ea9010113f69..e1b599ecbbc2 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -19,7 +19,6 @@ #include <linux/ptdump.h> #include <asm/e820/types.h> -#include <asm/pgtable.h> /* * The dumper groups pagetable entries of the same type into one, and for diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c5437f2964ee..0b03ae8c39cd 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -811,7 +811,7 @@ __bad_area(struct pt_regs *regs, unsigned long error_code, * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); __bad_area_nosemaphore(regs, error_code, address, pkey, si_code); } @@ -865,7 +865,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, * 2. T1 : set PKRU to deny access to pkey=4, touches page * 3. T1 : faults... * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); - * 5. T1 : enters fault handler, takes mmap_sem, etc... + * 5. T1 : enters fault handler, takes mmap_lock, etc... * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * faulted on a pte with its pkey=4. */ @@ -1231,15 +1231,15 @@ void do_user_addr_fault(struct pt_regs *regs, * Kernel-mode access to the user address space should only occur * on well-defined single instructions listed in the exception * tables. But, an erroneous kernel fault occurring outside one of - * those areas which also holds mmap_sem might deadlock attempting + * those areas which also holds mmap_lock might deadlock attempting * to validate the fault against the address space. * * Only do the expensive exception table search when we might be at * risk of a deadlock. This happens if we - * 1. Failed to acquire mmap_sem, and + * 1. Failed to acquire mmap_lock, and * 2. The access did not originate in userspace. */ - if (unlikely(!down_read_trylock(&mm->mmap_sem))) { + if (unlikely(!mmap_read_trylock(mm))) { if (!user_mode(regs) && !search_exception_tables(regs->ip)) { /* * Fault from code in kernel from @@ -1249,7 +1249,7 @@ void do_user_addr_fault(struct pt_regs *regs, return; } retry: - down_read(&mm->mmap_sem); + mmap_read_lock(mm); } else { /* * The above down_read_trylock() might have succeeded in @@ -1289,9 +1289,9 @@ good_area: * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if - * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. + * we get VM_FAULT_RETRY back, the mmap_lock has been unlocked. * - * Note that handle_userfault() may also release and reacquire mmap_sem + * Note that handle_userfault() may also release and reacquire mmap_lock * (and not return with VM_FAULT_RETRY), when returning to userland to * repeat the page fault later with a VM_FAULT_NOPAGE retval * (potentially after handling any pending signal during the return to @@ -1310,7 +1310,7 @@ good_area: } /* - * If we need to retry the mmap_sem has already been released, + * If we need to retry the mmap_lock has already been released, * and if there is a fatal signal pending there is no guarantee * that we made any progress. Handle this case first. */ @@ -1320,7 +1320,7 @@ good_area: goto retry; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); if (unlikely(fault & VM_FAULT_ERROR)) { mm_fault_error(regs, hw_error_code, address, fault); return; @@ -1359,7 +1359,7 @@ dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, unsigned long address) { - prefetchw(¤t->mm->mmap_sem); + prefetchw(¤t->mm->mmap_lock); /* * KVM has two types of events that are, logically, interrupts, but * are unfortunately delivered using the #PF vector. These events are diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 112d3b98a3b6..001dd7dc829f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -680,6 +680,28 @@ static void __init memory_map_bottom_up(unsigned long map_start, } } +/* + * The real mode trampoline, which is required for bootstrapping CPUs + * occupies only a small area under the low 1MB. See reserve_real_mode() + * for details. + * + * If KASLR is disabled the first PGD entry of the direct mapping is copied + * to map the real mode trampoline. + * + * If KASLR is enabled, copy only the PUD which covers the low 1MB + * area. This limits the randomization granularity to 1GB for both 4-level + * and 5-level paging. + */ +static void __init init_trampoline(void) +{ +#ifdef CONFIG_X86_64 + if (!kaslr_memory_enabled()) + trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; + else + init_trampoline_kaslr(); +#endif +} + void __init init_mem_mapping(void) { unsigned long end; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 4222a010057a..bda909e3e37e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -35,7 +35,6 @@ #include <asm/bios_ebda.h> #include <asm/processor.h> #include <linux/uaccess.h> -#include <asm/pgtable.h> #include <asm/dma.h> #include <asm/fixmap.h> #include <asm/e820/api.h> @@ -396,15 +395,6 @@ repeat: pte_t *kmap_pte; -static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) -{ - pgd_t *pgd = pgd_offset_k(vaddr); - p4d_t *p4d = p4d_offset(pgd, vaddr); - pud_t *pud = pud_offset(p4d, vaddr); - pmd_t *pmd = pmd_offset(pud, vaddr); - return pte_offset_kernel(pmd, vaddr); -} - static void __init kmap_init(void) { unsigned long kmap_vstart; @@ -413,28 +403,17 @@ static void __init kmap_init(void) * Cache the first kmap pte: */ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); + kmap_pte = virt_to_kpte(kmap_vstart); } #ifdef CONFIG_HIGHMEM static void __init permanent_kmaps_init(pgd_t *pgd_base) { - unsigned long vaddr; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; + unsigned long vaddr = PKMAP_BASE; - vaddr = PKMAP_BASE; page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - pgd = swapper_pg_dir + pgd_index(vaddr); - p4d = p4d_offset(pgd, vaddr); - pud = pud_offset(p4d, vaddr); - pmd = pmd_offset(pud, vaddr); - pte = pte_offset_kernel(pmd, vaddr); - pkmap_page_table = pte; + pkmap_page_table = virt_to_kpte(vaddr); } void __init add_highpages_with_active_regions(int nid, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index add03c35aa34..dbae185511cd 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -37,7 +37,6 @@ #include <asm/processor.h> #include <asm/bios_ebda.h> #include <linux/uaccess.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/dma.h> #include <asm/fixmap.h> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 986d57534fd6..84d85dbd1dad 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -16,12 +16,12 @@ #include <linux/mmiotrace.h> #include <linux/mem_encrypt.h> #include <linux/efi.h> +#include <linux/pgtable.h> #include <asm/set_memory.h> #include <asm/e820/api.h> #include <asm/efi.h> #include <asm/fixmap.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/pgalloc.h> #include <asm/memtype.h> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 763e71abc0fe..1a50434c8a4d 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -17,7 +17,6 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/sections.h> -#include <asm/pgtable.h> #include <asm/cpu_entry_area.h> extern struct range pfn_mapped[E820_MAX_ENTRIES]; diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index dc6182eecefa..fb620fd9dae9 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -24,9 +24,9 @@ #include <linux/init.h> #include <linux/random.h> #include <linux/memblock.h> +#include <linux/pgtable.h> #include <asm/pgalloc.h> -#include <asm/pgtable.h> #include <asm/setup.h> #include <asm/kaslr.h> @@ -61,15 +61,6 @@ static inline unsigned long get_padding(struct kaslr_memory_region *region) return (region->size_tb << TB_SHIFT); } -/* - * Apply no randomization if KASLR was disabled at boot or if KASAN - * is enabled. KASAN shadow mappings rely on regions being PGD aligned. - */ -static inline bool kaslr_memory_enabled(void) -{ - return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); -} - /* Initialize base and padding for each memory region randomized with KASLR */ void __init kernel_randomize_memory(void) { @@ -148,7 +139,7 @@ void __init kernel_randomize_memory(void) } } -static void __meminit init_trampoline_pud(void) +void __meminit init_trampoline_kaslr(void) { pud_t *pud_page_tramp, *pud, *pud_tramp; p4d_t *p4d_page_tramp, *p4d, *p4d_tramp; @@ -189,25 +180,3 @@ static void __meminit init_trampoline_pud(void) __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); } } - -/* - * The real mode trampoline, which is required for bootstrapping CPUs - * occupies only a small area under the low 1MB. See reserve_real_mode() - * for details. - * - * If KASLR is disabled the first PGD entry of the direct mapping is copied - * to map the real mode trampoline. - * - * If KASLR is enabled, copy only the PUD which covers the low 1MB - * area. This limits the randomization granularity to 1GB for both 4-level - * and 5-level paging. - */ -void __meminit init_trampoline(void) -{ - if (!kaslr_memory_enabled()) { - init_trampoline_default(); - return; - } - - init_trampoline_pud(); -} diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index f5b85bdc0535..e1d7d7477c22 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -9,35 +9,21 @@ static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits) return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); } -static __always_inline bool invalid_probe_range(u64 vaddr) +bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) { + unsigned long vaddr = (unsigned long)unsafe_src; + /* * Range covering the highest possible canonical userspace address * as well as non-canonical address range. For the canonical range * we also need to include the userspace guard page. */ - return vaddr < TASK_SIZE_MAX + PAGE_SIZE || - canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr; + return vaddr >= TASK_SIZE_MAX + PAGE_SIZE && + canonical_address(vaddr, boot_cpu_data.x86_virt_bits) == vaddr; } #else -static __always_inline bool invalid_probe_range(u64 vaddr) +bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) { - return vaddr < TASK_SIZE_MAX; + return (unsigned long)unsafe_src >= TASK_SIZE_MAX; } #endif - -long probe_kernel_read_strict(void *dst, const void *src, size_t size) -{ - if (unlikely(invalid_probe_range((unsigned long)src))) - return -EFAULT; - - return __probe_kernel_read(dst, src, size); -} - -long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count) -{ - if (unlikely(invalid_probe_range((unsigned long)unsafe_addr))) - return -EFAULT; - - return __strncpy_from_unsafe(dst, unsafe_addr, count); -} diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 106ead05bbe3..7a84fc8bc5c3 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -8,7 +8,7 @@ */ #include <linux/linkage.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include <asm/page.h> #include <asm/processor-flags.h> #include <asm/msr-index.h> diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 43fd19b3f118..bd7aff5c51f7 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -17,8 +17,8 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/io.h> -#include <asm/pgtable.h> #include <linux/mmiotrace.h> +#include <linux/pgtable.h> #include <asm/e820/api.h> /* for ISA_START_ADDRESS */ #include <linux/atomic.h> #include <linux/percpu.h> diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c index facce271e8b9..0612a73638a8 100644 --- a/arch/x86/mm/pat/cpa-test.c +++ b/arch/x86/mm/pat/cpa-test.c @@ -14,7 +14,6 @@ #include <linux/vmalloc.h> #include <asm/cacheflush.h> -#include <asm/pgtable.h> #include <asm/kdebug.h> /* diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 394be8611748..8f665c352bf0 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -46,7 +46,6 @@ #include <asm/processor.h> #include <asm/tlbflush.h> #include <asm/x86_init.h> -#include <asm/pgtable.h> #include <asm/fcntl.h> #include <asm/e820/api.h> #include <asm/mtrr.h> diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c index a07e4882bf36..645613d59942 100644 --- a/arch/x86/mm/pat/memtype_interval.c +++ b/arch/x86/mm/pat/memtype_interval.c @@ -14,8 +14,8 @@ #include <linux/interval_tree_generic.h> #include <linux/sched.h> #include <linux/gfp.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/memtype.h> #include "memtype.h" diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index d88e9064c28e..dfd82f51ba66 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -3,7 +3,6 @@ #include <linux/gfp.h> #include <linux/hugetlb.h> #include <asm/pgalloc.h> -#include <asm/pgtable.h> #include <asm/tlb.h> #include <asm/fixmap.h> #include <asm/mtrr.h> diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index e1ce59dc558f..1953685c2ddf 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -11,7 +11,6 @@ #include <linux/spinlock.h> #include <asm/cpu_entry_area.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/fixmap.h> #include <asm/e820/api.h> diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index da0fb17a1a36..a3c6757a65c7 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -34,7 +34,6 @@ #include <asm/vsyscall.h> #include <asm/cmdline.h> #include <asm/pti.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/desc.h> diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index adb3c5784dac..ed5667f5169f 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -2,8 +2,8 @@ #include <linux/spinlock.h> #include <linux/errno.h> #include <linux/init.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/proto.h> #include <asm/cpufeature.h> diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index c049c432745d..826ead67753d 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -24,11 +24,11 @@ #include <linux/types.h> #include <linux/ioport.h> #include <linux/efi.h> +#include <linux/pgtable.h> #include <asm/io.h> #include <asm/desc.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/efi.h> diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index c5e393f8bb3f..8e364c4c6768 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -39,7 +39,6 @@ #include <asm/setup.h> #include <asm/page.h> #include <asm/e820/api.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/proto.h> #include <asm/efi.h> diff --git a/arch/x86/platform/olpc/olpc_ofw.c b/arch/x86/platform/olpc/olpc_ofw.c index 20a064568463..6bab0f0aa8f3 100644 --- a/arch/x86/platform/olpc/olpc_ofw.c +++ b/arch/x86/platform/olpc/olpc_ofw.c @@ -3,12 +3,12 @@ #include <linux/export.h> #include <linux/spinlock_types.h> #include <linux/init.h> +#include <linux/pgtable.h> #include <asm/page.h> #include <asm/setup.h> #include <asm/io.h> #include <asm/cpufeature.h> #include <asm/special_insns.h> -#include <asm/pgtable.h> #include <asm/olpc_ofw.h> /* address of OFW callback interface; will be NULL if OFW isn't found */ diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index fc3b757afb2c..7c65102debaf 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -13,8 +13,8 @@ #include <linux/perf_event.h> #include <linux/tboot.h> #include <linux/dmi.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/proto.h> #include <asm/mtrr.h> #include <asm/page.h> diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index fc413717a45f..d147f1b2c925 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -12,6 +12,7 @@ #include <linux/scatterlist.h> #include <linux/kdebug.h> #include <linux/cpu.h> +#include <linux/pgtable.h> #include <crypto/hash.h> @@ -19,7 +20,6 @@ #include <asm/init.h> #include <asm/proto.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/mtrr.h> #include <asm/sections.h> #include <asm/suspend.h> diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index a1061d471b73..223d5bca29b8 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -8,9 +8,9 @@ #include <linux/gfp.h> #include <linux/suspend.h> #include <linux/memblock.h> +#include <linux/pgtable.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/mmzone.h> #include <asm/sections.h> #include <asm/suspend.h> diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 0197095d9637..a595953f1d6d 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -12,6 +12,7 @@ #include <linux/suspend.h> #include <linux/scatterlist.h> #include <linux/kdebug.h> +#include <linux/pgtable.h> #include <crypto/hash.h> @@ -19,7 +20,6 @@ #include <asm/init.h> #include <asm/proto.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/mtrr.h> #include <asm/sections.h> #include <asm/suspend.h> diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 262f83cad355..1ed1208931e0 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -3,9 +3,9 @@ #include <linux/slab.h> #include <linux/memblock.h> #include <linux/mem_encrypt.h> +#include <linux/pgtable.h> #include <asm/set_memory.h> -#include <asm/pgtable.h> #include <asm/realmode.h> #include <asm/tlbflush.h> #include <asm/crash.h> diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c index 9e7c4aba6c3a..76d9f6ce7a3d 100644 --- a/arch/x86/um/vdso/vma.c +++ b/arch/x86/um/vdso/vma.c @@ -58,7 +58,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!vdso_enabled) return 0; - if (down_write_killable(&mm->mmap_sem)) + if (mmap_write_lock_killable(mm)) return -EINTR; err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, @@ -66,7 +66,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdsop); - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return err; } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 507f4fb88fa7..c2c97faaf004 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -63,7 +63,6 @@ #include <asm/setup.h> #include <asm/desc.h> #include <asm/pgalloc.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/reboot.h> #include <asm/stackprotector.h> diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index ecb0d5450334..4988e19598c8 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -21,7 +21,6 @@ #include <xen/grant_table.h> #include <xen/xen.h> -#include <asm/pgtable.h> static struct gnttab_vm_area { struct vm_struct *area; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index bbba8b17829a..a58d9c69807a 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -51,13 +51,13 @@ #include <linux/memblock.h> #include <linux/seq_file.h> #include <linux/crash_dump.h> +#include <linux/pgtable.h> #ifdef CONFIG_KEXEC_CORE #include <linux/kexec.h> #endif #include <trace/events/xen.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/fixmap.h> #include <asm/mmu_context.h> diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index f2adb63b2d7c..8fa01c545460 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -23,10 +23,10 @@ #include <linux/nmi.h> #include <linux/cpuhotplug.h> #include <linux/stackprotector.h> +#include <linux/pgtable.h> #include <asm/paravirt.h> #include <asm/desc.h> -#include <asm/pgtable.h> #include <asm/cpu.h> #include <xen/interface/xen.h> |