diff options
Diffstat (limited to 'arch/arm64/kernel')
30 files changed, 639 insertions, 553 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 86364ab6f13f..ed65576ce710 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -17,7 +17,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o proton-pack.o + syscall.o proton-pack.o idreg-override.o targets += efi-entry.o @@ -59,9 +59,10 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_ARM64_MTE) += mte.o +obj-y += vdso-wrap.o +obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o -obj-y += vdso/ probes/ -obj-$(CONFIG_COMPAT_VDSO) += vdso32/ +obj-y += probes/ head-y := head.o extra-y += $(head-y) vmlinux.lds diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index a57cffb752e8..1184c44ea2c7 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -17,7 +17,7 @@ #include <asm/sections.h> #include <linux/stop_machine.h> -#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f) +#define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f) #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 301784463587..a36e2fc330d4 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -99,6 +99,9 @@ int main(void) DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); + DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val)); + DEFINE(FTR_OVR_MASK_OFFSET, offsetof(struct arm64_ftr_override, mask)); + BLANK(); #ifdef CONFIG_KVM DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a63428301f42..506a1cd37973 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -107,8 +107,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) } #ifdef CONFIG_ARM64_ERRATUM_1463225 -DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); - static bool has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry, int scope) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3e6331b64932..066030717a4c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -352,9 +352,12 @@ static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_END, }; +static struct arm64_ftr_override __ro_after_init no_override = { }; + struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { .name = "SYS_CTR_EL0", - .ftr_bits = ftr_ctr + .ftr_bits = ftr_ctr, + .override = &no_override, }; static const struct arm64_ftr_bits ftr_id_mmfr0[] = { @@ -544,13 +547,20 @@ static const struct arm64_ftr_bits ftr_raz[] = { ARM64_FTR_END, }; -#define ARM64_FTR_REG(id, table) { \ - .sys_id = id, \ - .reg = &(struct arm64_ftr_reg){ \ - .name = #id, \ - .ftr_bits = &((table)[0]), \ +#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) { \ + .sys_id = id, \ + .reg = &(struct arm64_ftr_reg){ \ + .name = #id, \ + .override = (ovr), \ + .ftr_bits = &((table)[0]), \ }} +#define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, &no_override) + +struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; +struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; +struct arm64_ftr_override __ro_after_init id_aa64isar1_override; + static const struct __ftr_reg_entry { u32 sys_id; struct arm64_ftr_reg *reg; @@ -585,7 +595,8 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, + &id_aa64pfr1_override), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), /* Op1 = 0, CRn = 0, CRm = 5 */ @@ -594,11 +605,13 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), - ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, + &id_aa64isar1_override), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), - ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1, + &id_aa64mmfr1_override), ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), /* Op1 = 0, CRn = 1, CRm = 2 */ @@ -770,6 +783,33 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { u64 ftr_mask = arm64_ftr_mask(ftrp); s64 ftr_new = arm64_ftr_value(ftrp, new); + s64 ftr_ovr = arm64_ftr_value(ftrp, reg->override->val); + + if ((ftr_mask & reg->override->mask) == ftr_mask) { + s64 tmp = arm64_ftr_safe_value(ftrp, ftr_ovr, ftr_new); + char *str = NULL; + + if (ftr_ovr != tmp) { + /* Unsafe, remove the override */ + reg->override->mask &= ~ftr_mask; + reg->override->val &= ~ftr_mask; + tmp = ftr_ovr; + str = "ignoring override"; + } else if (ftr_new != tmp) { + /* Override was valid */ + ftr_new = tmp; + str = "forced"; + } else if (ftr_ovr == tmp) { + /* Override was the safe value */ + str = "already set"; + } + + if (str) + pr_warn("%s[%d:%d]: %s to %llx\n", + reg->name, + ftrp->shift + ftrp->width - 1, + ftrp->shift, str, tmp); + } val = arm64_ftr_set_value(ftrp, val, ftr_new); @@ -1115,14 +1155,17 @@ u64 read_sanitised_ftr_reg(u32 id) EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg); #define read_sysreg_case(r) \ - case r: return read_sysreg_s(r) + case r: val = read_sysreg_s(r); break; /* * __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated. * Read the system register on the current CPU */ -static u64 __read_sysreg_by_encoding(u32 sys_id) +u64 __read_sysreg_by_encoding(u32 sys_id) { + struct arm64_ftr_reg *regp; + u64 val; + switch (sys_id) { read_sysreg_case(SYS_ID_PFR0_EL1); read_sysreg_case(SYS_ID_PFR1_EL1); @@ -1165,6 +1208,14 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) BUG(); return 0; } + + regp = get_arm64_ftr_reg(sys_id); + if (regp) { + val &= ~regp->override->mask; + val |= (regp->override->val & regp->override->mask); + } + + return val; } #include <linux/irqchip/arm-gic-v3.h> @@ -1455,7 +1506,7 @@ static bool cpu_has_broken_dbm(void) /* List of CPUs which have broken DBM support. */ static const struct midr_range cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_1024718 - MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), /* Kryo4xx Silver (rdpe => r1p0) */ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe), #endif diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 5346953e4382..9d3588450473 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -109,6 +109,55 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) exit_to_kernel_mode(regs); } +#ifdef CONFIG_ARM64_ERRATUM_1463225 +static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); + +static void cortex_a76_erratum_1463225_svc_handler(void) +{ + u32 reg, val; + + if (!unlikely(test_thread_flag(TIF_SINGLESTEP))) + return; + + if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225))) + return; + + __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1); + reg = read_sysreg(mdscr_el1); + val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE; + write_sysreg(val, mdscr_el1); + asm volatile("msr daifclr, #8"); + isb(); + + /* We will have taken a single-step exception by this point */ + + write_sysreg(reg, mdscr_el1); + __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0); +} + +static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +{ + if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) + return false; + + /* + * We've taken a dummy step exception from the kernel to ensure + * that interrupts are re-enabled on the syscall path. Return back + * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions + * masked so that we can safely restore the mdscr and get on with + * handling the syscall. + */ + regs->pstate |= PSR_D_BIT; + return true; +} +#else /* CONFIG_ARM64_ERRATUM_1463225 */ +static void cortex_a76_erratum_1463225_svc_handler(void) { } +static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_ARM64_ERRATUM_1463225 */ + static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -186,7 +235,8 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); arm64_enter_el1_dbg(regs); - do_debug_exception(far, esr, regs); + if (!cortex_a76_erratum_1463225_debug_handler(regs)) + do_debug_exception(far, esr, regs); arm64_exit_el1_dbg(regs); } @@ -362,6 +412,7 @@ static void noinstr el0_svc(struct pt_regs *regs) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); enter_from_user_mode(); + cortex_a76_erratum_1463225_svc_handler(); do_el0_svc(regs); } @@ -439,6 +490,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); enter_from_user_mode(); + cortex_a76_erratum_1463225_svc_handler(); do_el0_svc_compat(regs); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c9bae73f2621..a31a0a713c85 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -261,16 +261,16 @@ alternative_else_nop_endif stp lr, x21, [sp, #S_LR] /* - * In order to be able to dump the contents of struct pt_regs at the - * time the exception was taken (in case we attempt to walk the call - * stack later), chain it together with the stack frames. + * For exceptions from EL0, terminate the callchain here. + * For exceptions from EL1, create a synthetic frame record so the + * interrupted code shows up in the backtrace. */ .if \el == 0 - stp xzr, xzr, [sp, #S_STACKFRAME] + mov x29, xzr .else stp x29, x22, [sp, #S_STACKFRAME] - .endif add x29, sp, #S_STACKFRAME + .endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN alternative_if_not ARM64_HAS_PAN @@ -805,7 +805,7 @@ SYM_CODE_END(ret_to_user) // Move from tramp_pg_dir to swapper_pg_dir .macro tramp_map_kernel, tmp mrs \tmp, ttbr1_el1 - add \tmp, \tmp, #(2 * PAGE_SIZE) + add \tmp, \tmp, #TRAMP_SWAPPER_OFFSET bic \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 @@ -825,7 +825,7 @@ alternative_else_nop_endif // Move from swapper_pg_dir to tramp_pg_dir .macro tramp_unmap_kernel, tmp mrs \tmp, ttbr1_el1 - sub \tmp, \tmp, #(2 * PAGE_SIZE) + sub \tmp, \tmp, #TRAMP_SWAPPER_OFFSET orr \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a0dc987724ed..1e30b5550d2a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -404,10 +404,6 @@ SYM_FUNC_START_LOCAL(__primary_switched) adr_l x5, init_task msr sp_el0, x5 // Save thread_info -#ifdef CONFIG_ARM64_PTR_AUTH - __ptrauth_keys_init_cpu x5, x6, x7, x8 -#endif - adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb @@ -436,10 +432,12 @@ SYM_FUNC_START_LOCAL(__primary_switched) #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) bl kasan_early_init #endif + mov x0, x21 // pass FDT address in x0 + bl early_fdt_map // Try mapping the FDT early + bl init_feature_override // Parse cpu feature overrides #ifdef CONFIG_RANDOMIZE_BASE tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? b.ne 0f - mov x0, x21 // pass FDT address in x0 bl kaslr_early_init // parse FDT for KASLR options cbz x0, 0f // KASLR disabled? just proceed orr x23, x23, x0 // record KASLR offset @@ -447,6 +445,7 @@ SYM_FUNC_START_LOCAL(__primary_switched) ret // to __primary_switch() 0: #endif + bl switch_to_vhe // Prefer VHE if possible add sp, sp, #16 mov x29, #0 mov x30, #0 @@ -478,13 +477,14 @@ EXPORT_SYMBOL(kimage_vaddr) * booted in EL1 or EL2 respectively. */ SYM_FUNC_START(init_kernel_el) + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr sctlr_el1, x0 + mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq init_el2 SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 isb mov_q x0, INIT_PSTATE_EL1 msr spsr_el1, x0 @@ -493,50 +493,11 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) eret SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) -#ifdef CONFIG_ARM64_VHE - /* - * Check for VHE being present. x2 being non-zero indicates that we - * do have VHE, and that the kernel is intended to run at EL2. - */ - mrs x2, id_aa64mmfr1_el1 - ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 -#else - mov x2, xzr -#endif - cbz x2, init_el2_nvhe - - /* - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. - */ - mov_q x0, HCR_HOST_VHE_FLAGS - msr hcr_el2, x0 - isb - - init_el2_state vhe - - isb - - mov_q x0, INIT_PSTATE_EL2 - msr spsr_el2, x0 - msr elr_el2, lr - mov w0, #BOOT_CPU_MODE_EL2 - eret - -SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL) - /* - * When VHE is not in use, early init of EL2 and EL1 needs to be - * done here. - */ - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 - mov_q x0, HCR_HOST_NVHE_FLAGS msr hcr_el2, x0 isb - init_el2_state nvhe + init_el2_state /* Hypervisor stub */ adr_l x0, __hyp_stub_vectors @@ -623,6 +584,7 @@ SYM_FUNC_START_LOCAL(secondary_startup) /* * Common entry point for secondary CPUs. */ + bl switch_to_vhe bl __cpu_secondary_check52bitva bl __cpu_setup // initialise processor adrp x1, swapper_pg_dir @@ -703,16 +665,9 @@ SYM_FUNC_START(__enable_mmu) offset_ttbr1 x1, x3 msr ttbr1_el1, x1 // load TTBR1 isb - msr sctlr_el1, x0 - isb - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively from the PoC are discarded, since they may have - * been dynamically patched at the PoU. - */ - ic iallu - dsb nsh - isb + + set_sctlr_el1 x0 + ret SYM_FUNC_END(__enable_mmu) @@ -883,11 +838,7 @@ SYM_FUNC_START_LOCAL(__primary_switch) tlbi vmalle1 // Remove any stale TLB entries dsb nsh - msr sctlr_el1, x19 // re-enable the MMU - isb - ic iallu // flush instructions fetched - dsb nsh // via old mapping - isb + set_sctlr_el1 x19 // re-enable the MMU bl __relocate_kernel #endif diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 9c9f47e9f7f4..b1cef371df2b 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -16,7 +16,6 @@ #define pr_fmt(x) "hibernate: " x #include <linux/cpu.h> #include <linux/kvm_host.h> -#include <linux/mm.h> #include <linux/pm.h> #include <linux/sched.h> #include <linux/suspend.h> @@ -31,13 +30,12 @@ #include <asm/memory.h> #include <asm/mmu_context.h> #include <asm/mte.h> -#include <asm/pgalloc.h> -#include <asm/pgtable-hwdef.h> #include <asm/sections.h> #include <asm/smp.h> #include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/sysreg.h> +#include <asm/trans_pgd.h> #include <asm/virt.h> /* @@ -178,52 +176,9 @@ int arch_hibernation_header_restore(void *addr) } EXPORT_SYMBOL(arch_hibernation_header_restore); -static int trans_pgd_map_page(pgd_t *trans_pgd, void *page, - unsigned long dst_addr, - pgprot_t pgprot) +static void *hibernate_page_alloc(void *arg) { - pgd_t *pgdp; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - pgdp = pgd_offset_pgd(trans_pgd, dst_addr); - if (pgd_none(READ_ONCE(*pgdp))) { - pudp = (void *)get_safe_page(GFP_ATOMIC); - if (!pudp) - return -ENOMEM; - pgd_populate(&init_mm, pgdp, pudp); - } - - p4dp = p4d_offset(pgdp, dst_addr); - if (p4d_none(READ_ONCE(*p4dp))) { - pudp = (void *)get_safe_page(GFP_ATOMIC); - if (!pudp) - return -ENOMEM; - p4d_populate(&init_mm, p4dp, pudp); - } - - pudp = pud_offset(p4dp, dst_addr); - if (pud_none(READ_ONCE(*pudp))) { - pmdp = (void *)get_safe_page(GFP_ATOMIC); - if (!pmdp) - return -ENOMEM; - pud_populate(&init_mm, pudp, pmdp); - } - - pmdp = pmd_offset(pudp, dst_addr); - if (pmd_none(READ_ONCE(*pmdp))) { - ptep = (void *)get_safe_page(GFP_ATOMIC); - if (!ptep) - return -ENOMEM; - pmd_populate_kernel(&init_mm, pmdp, ptep); - } - - ptep = pte_offset_kernel(pmdp, dst_addr); - set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC)); - - return 0; + return (void *)get_safe_page((__force gfp_t)(unsigned long)arg); } /* @@ -239,11 +194,16 @@ static int trans_pgd_map_page(pgd_t *trans_pgd, void *page, * page system. */ static int create_safe_exec_page(void *src_start, size_t length, - unsigned long dst_addr, phys_addr_t *phys_dst_addr) { + struct trans_pgd_info trans_info = { + .trans_alloc_page = hibernate_page_alloc, + .trans_alloc_arg = (__force void *)GFP_ATOMIC, + }; + void *page = (void *)get_safe_page(GFP_ATOMIC); - pgd_t *trans_pgd; + phys_addr_t trans_ttbr0; + unsigned long t0sz; int rc; if (!page) @@ -251,13 +211,7 @@ static int create_safe_exec_page(void *src_start, size_t length, memcpy(page, src_start, length); __flush_icache_range((unsigned long)page, (unsigned long)page + length); - - trans_pgd = (void *)get_safe_page(GFP_ATOMIC); - if (!trans_pgd) - return -ENOMEM; - - rc = trans_pgd_map_page(trans_pgd, page, dst_addr, - PAGE_KERNEL_EXEC); + rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page); if (rc) return rc; @@ -270,12 +224,15 @@ static int create_safe_exec_page(void *src_start, size_t length, * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI * runtime services), while for a userspace-driven test_resume cycle it * points to userspace page tables (and we must point it at a zero page - * ourselves). Elsewhere we only (un)install the idmap with preemption - * disabled, so T0SZ should be as required regardless. + * ourselves). + * + * We change T0SZ as part of installing the idmap. This is undone by + * cpu_uninstall_idmap() in __cpu_suspend_exit(). */ cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1); + __cpu_set_tcr_t0sz(t0sz); + write_sysreg(trans_ttbr0, ttbr0_el1); isb(); *phys_dst_addr = virt_to_phys(page); @@ -462,182 +419,6 @@ int swsusp_arch_suspend(void) return ret; } -static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) -{ - pte_t pte = READ_ONCE(*src_ptep); - - if (pte_valid(pte)) { - /* - * Resume will overwrite areas that may be marked - * read only (code, rodata). Clear the RDONLY bit from - * the temporary mappings we use during restore. - */ - set_pte(dst_ptep, pte_mkwrite(pte)); - } else if (debug_pagealloc_enabled() && !pte_none(pte)) { - /* - * debug_pagealloc will removed the PTE_VALID bit if - * the page isn't in use by the resume kernel. It may have - * been in use by the original kernel, in which case we need - * to put it back in our copy to do the restore. - * - * Before marking this entry valid, check the pfn should - * be mapped. - */ - BUG_ON(!pfn_valid(pte_pfn(pte))); - - set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte))); - } -} - -static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start, - unsigned long end) -{ - pte_t *src_ptep; - pte_t *dst_ptep; - unsigned long addr = start; - - dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC); - if (!dst_ptep) - return -ENOMEM; - pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep); - dst_ptep = pte_offset_kernel(dst_pmdp, start); - - src_ptep = pte_offset_kernel(src_pmdp, start); - do { - _copy_pte(dst_ptep, src_ptep, addr); - } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end); - - return 0; -} - -static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, - unsigned long end) -{ - pmd_t *src_pmdp; - pmd_t *dst_pmdp; - unsigned long next; - unsigned long addr = start; - - if (pud_none(READ_ONCE(*dst_pudp))) { - dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC); - if (!dst_pmdp) - return -ENOMEM; - pud_populate(&init_mm, dst_pudp, dst_pmdp); - } - dst_pmdp = pmd_offset(dst_pudp, start); - - src_pmdp = pmd_offset(src_pudp, start); - do { - pmd_t pmd = READ_ONCE(*src_pmdp); - - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - continue; - if (pmd_table(pmd)) { - if (copy_pte(dst_pmdp, src_pmdp, addr, next)) - return -ENOMEM; - } else { - set_pmd(dst_pmdp, - __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY)); - } - } while (dst_pmdp++, src_pmdp++, addr = next, addr != end); - - return 0; -} - -static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start, - unsigned long end) -{ - pud_t *dst_pudp; - pud_t *src_pudp; - unsigned long next; - unsigned long addr = start; - - if (p4d_none(READ_ONCE(*dst_p4dp))) { - dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC); - if (!dst_pudp) - return -ENOMEM; - p4d_populate(&init_mm, dst_p4dp, dst_pudp); - } - dst_pudp = pud_offset(dst_p4dp, start); - - src_pudp = pud_offset(src_p4dp, start); - do { - pud_t pud = READ_ONCE(*src_pudp); - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - continue; - if (pud_table(pud)) { - if (copy_pmd(dst_pudp, src_pudp, addr, next)) - return -ENOMEM; - } else { - set_pud(dst_pudp, - __pud(pud_val(pud) & ~PUD_SECT_RDONLY)); - } - } while (dst_pudp++, src_pudp++, addr = next, addr != end); - - return 0; -} - -static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, - unsigned long end) -{ - p4d_t *dst_p4dp; - p4d_t *src_p4dp; - unsigned long next; - unsigned long addr = start; - - dst_p4dp = p4d_offset(dst_pgdp, start); - src_p4dp = p4d_offset(src_pgdp, start); - do { - next = p4d_addr_end(addr, end); - if (p4d_none(READ_ONCE(*src_p4dp))) - continue; - if (copy_pud(dst_p4dp, src_p4dp, addr, next)) - return -ENOMEM; - } while (dst_p4dp++, src_p4dp++, addr = next, addr != end); - - return 0; -} - -static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, - unsigned long end) -{ - unsigned long next; - unsigned long addr = start; - pgd_t *src_pgdp = pgd_offset_k(start); - - dst_pgdp = pgd_offset_pgd(dst_pgdp, start); - do { - next = pgd_addr_end(addr, end); - if (pgd_none(READ_ONCE(*src_pgdp))) - continue; - if (copy_p4d(dst_pgdp, src_pgdp, addr, next)) - return -ENOMEM; - } while (dst_pgdp++, src_pgdp++, addr = next, addr != end); - - return 0; -} - -static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start, - unsigned long end) -{ - int rc; - pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC); - - if (!trans_pgd) { - pr_err("Failed to allocate memory for temporary page tables.\n"); - return -ENOMEM; - } - - rc = copy_page_tables(trans_pgd, start, end); - if (!rc) - *dst_pgdp = trans_pgd; - - return rc; -} - /* * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). * @@ -650,16 +431,20 @@ int swsusp_arch_resume(void) void *zero_page; size_t exit_size; pgd_t *tmp_pg_dir; - phys_addr_t phys_hibernate_exit; void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, void *, phys_addr_t, phys_addr_t); + struct trans_pgd_info trans_info = { + .trans_alloc_page = hibernate_page_alloc, + .trans_alloc_arg = (void *)GFP_ATOMIC, + }; /* * Restoring the memory image will overwrite the ttbr1 page tables. * Create a second copy of just the linear map, and use this when * restoring. */ - rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END); + rc = trans_pgd_create_copy(&trans_info, &tmp_pg_dir, PAGE_OFFSET, + PAGE_END); if (rc) return rc; @@ -673,19 +458,13 @@ int swsusp_arch_resume(void) return -ENOMEM; } - /* - * Locate the exit code in the bottom-but-one page, so that *NULL - * still has disastrous affects. - */ - hibernate_exit = (void *)PAGE_SIZE; exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; /* * Copy swsusp_arch_suspend_exit() to a safe page. This will generate * a new set of ttbr0 page tables and load them. */ rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, - (unsigned long)hibernate_exit, - &phys_hibernate_exit); + (phys_addr_t *)&hibernate_exit); if (rc) { pr_err("Failed to create safe executable page for hibernate_exit code.\n"); return rc; @@ -704,7 +483,7 @@ int swsusp_arch_resume(void) * We can skip this step if we booted at EL1, or are running with VHE. */ if (el2_reset_needed()) { - phys_addr_t el2_vectors = phys_hibernate_exit; /* base */ + phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit; el2_vectors += hibernate_el2_vectors - __hibernate_exit_text_start; /* offset */ diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 160f5881a0b7..678cd2c618ee 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -8,9 +8,9 @@ #include <linux/init.h> #include <linux/linkage.h> -#include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> +#include <asm/el2_setup.h> #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> #include <asm/ptrace.h> @@ -47,10 +47,13 @@ SYM_CODE_END(__hyp_stub_vectors) SYM_CODE_START_LOCAL(el1_sync) cmp x0, #HVC_SET_VECTORS - b.ne 2f + b.ne 1f msr vbar_el2, x1 b 9f +1: cmp x0, #HVC_VHE_RESTART + b.eq mutate_to_vhe + 2: cmp x0, #HVC_SOFT_RESTART b.ne 3f mov x0, x2 @@ -70,6 +73,88 @@ SYM_CODE_START_LOCAL(el1_sync) eret SYM_CODE_END(el1_sync) +// nVHE? No way! Give me the real thing! +SYM_CODE_START_LOCAL(mutate_to_vhe) + // Be prepared to fail + mov_q x0, HVC_STUB_ERR + + // Sanity check: MMU *must* be off + mrs x1, sctlr_el2 + tbnz x1, #0, 1f + + // Needs to be VHE capable, obviously + mrs x1, id_aa64mmfr1_el1 + ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4 + cbz x1, 1f + + // Check whether VHE is disabled from the command line + adr_l x1, id_aa64mmfr1_override + ldr x2, [x1, FTR_OVR_VAL_OFFSET] + ldr x1, [x1, FTR_OVR_MASK_OFFSET] + ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 + ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4 + cmp x1, xzr + and x2, x2, x1 + csinv x2, x2, xzr, ne + cbz x2, 1f + + // Engage the VHE magic! + mov_q x0, HCR_HOST_VHE_FLAGS + msr hcr_el2, x0 + isb + + // Use the EL1 allocated stack, per-cpu offset + mrs x0, sp_el1 + mov sp, x0 + mrs x0, tpidr_el1 + msr tpidr_el2, x0 + + // FP configuration, vectors + mrs_s x0, SYS_CPACR_EL12 + msr cpacr_el1, x0 + mrs_s x0, SYS_VBAR_EL12 + msr vbar_el1, x0 + + // Use EL2 translations for SPE and disable access from EL1 + mrs x0, mdcr_el2 + bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) + msr mdcr_el2, x0 + + // Transfer the MM state from EL1 to EL2 + mrs_s x0, SYS_TCR_EL12 + msr tcr_el1, x0 + mrs_s x0, SYS_TTBR0_EL12 + msr ttbr0_el1, x0 + mrs_s x0, SYS_TTBR1_EL12 + msr ttbr1_el1, x0 + mrs_s x0, SYS_MAIR_EL12 + msr mair_el1, x0 + isb + + // Invalidate TLBs before enabling the MMU + tlbi vmalle1 + dsb nsh + + // Enable the EL2 S1 MMU, as set up from EL1 + mrs_s x0, SYS_SCTLR_EL12 + set_sctlr_el1 x0 + + // Disable the EL1 S1 MMU for a good measure + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr_s SYS_SCTLR_EL12, x0 + + // Hack the exception return to stay at EL2 + mrs x0, spsr_el1 + and x0, x0, #~PSR_MODE_MASK + mov x1, #PSR_MODE_EL2h + orr x0, x0, x1 + msr spsr_el1, x0 + + mov x0, xzr + +1: eret +SYM_CODE_END(mutate_to_vhe) + .macro invalid_vector label SYM_CODE_START_LOCAL(\label) b \label @@ -85,6 +170,8 @@ SYM_CODE_END(\label) invalid_vector el1_fiq_invalid invalid_vector el1_error_invalid + .popsection + /* * __hyp_set_vectors: Call this after boot to set the initial hypervisor * vectors as part of hypervisor installation. On an SMP system, this should @@ -118,3 +205,27 @@ SYM_FUNC_START(__hyp_reset_vectors) hvc #0 ret SYM_FUNC_END(__hyp_reset_vectors) + +/* + * Entry point to switch to VHE if deemed capable + */ +SYM_FUNC_START(switch_to_vhe) +#ifdef CONFIG_ARM64_VHE + // Need to have booted at EL2 + adr_l x1, __boot_cpu_mode + ldr w0, [x1] + cmp w0, #BOOT_CPU_MODE_EL2 + b.ne 1f + + // and still be at EL1 + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL1 + b.ne 1f + + // Turn the world upside down + mov x0, #HVC_VHE_RESTART + hvc #0 +1: +#endif + ret +SYM_FUNC_END(switch_to_vhe) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c new file mode 100644 index 000000000000..dffb16682330 --- /dev/null +++ b/arch/arm64/kernel/idreg-override.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Early cpufeature override framework + * + * Copyright (C) 2020 Google LLC + * Author: Marc Zyngier <maz@kernel.org> + */ + +#include <linux/ctype.h> +#include <linux/kernel.h> +#include <linux/libfdt.h> + +#include <asm/cacheflush.h> +#include <asm/cpufeature.h> +#include <asm/setup.h> + +#define FTR_DESC_NAME_LEN 20 +#define FTR_DESC_FIELD_LEN 10 +#define FTR_ALIAS_NAME_LEN 30 +#define FTR_ALIAS_OPTION_LEN 80 + +struct ftr_set_desc { + char name[FTR_DESC_NAME_LEN]; + struct arm64_ftr_override *override; + struct { + char name[FTR_DESC_FIELD_LEN]; + u8 shift; + } fields[]; +}; + +static const struct ftr_set_desc mmfr1 __initconst = { + .name = "id_aa64mmfr1", + .override = &id_aa64mmfr1_override, + .fields = { + { "vh", ID_AA64MMFR1_VHE_SHIFT }, + {} + }, +}; + +static const struct ftr_set_desc pfr1 __initconst = { + .name = "id_aa64pfr1", + .override = &id_aa64pfr1_override, + .fields = { + { "bt", ID_AA64PFR1_BT_SHIFT }, + {} + }, +}; + +static const struct ftr_set_desc isar1 __initconst = { + .name = "id_aa64isar1", + .override = &id_aa64isar1_override, + .fields = { + { "gpi", ID_AA64ISAR1_GPI_SHIFT }, + { "gpa", ID_AA64ISAR1_GPA_SHIFT }, + { "api", ID_AA64ISAR1_API_SHIFT }, + { "apa", ID_AA64ISAR1_APA_SHIFT }, + {} + }, +}; + +extern struct arm64_ftr_override kaslr_feature_override; + +static const struct ftr_set_desc kaslr __initconst = { + .name = "kaslr", +#ifdef CONFIG_RANDOMIZE_BASE + .override = &kaslr_feature_override, +#endif + .fields = { + { "disabled", 0 }, + {} + }, +}; + +static const struct ftr_set_desc * const regs[] __initconst = { + &mmfr1, + &pfr1, + &isar1, + &kaslr, +}; + +static const struct { + char alias[FTR_ALIAS_NAME_LEN]; + char feature[FTR_ALIAS_OPTION_LEN]; +} aliases[] __initconst = { + { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, + { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, + { "arm64.nobti", "id_aa64pfr1.bt=0" }, + { "arm64.nopauth", + "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " + "id_aa64isar1.api=0 id_aa64isar1.apa=0" }, + { "nokaslr", "kaslr.disabled=1" }, +}; + +static int __init find_field(const char *cmdline, + const struct ftr_set_desc *reg, int f, u64 *v) +{ + char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2]; + int len; + + len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=", + reg->name, reg->fields[f].name); + + if (!parameqn(cmdline, opt, len)) + return -1; + + return kstrtou64(cmdline + len, 0, v); +} + +static void __init match_options(const char *cmdline) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(regs); i++) { + int f; + + if (!regs[i]->override) + continue; + + for (f = 0; strlen(regs[i]->fields[f].name); f++) { + u64 shift = regs[i]->fields[f].shift; + u64 mask = 0xfUL << shift; + u64 v; + + if (find_field(cmdline, regs[i], f, &v)) + continue; + + regs[i]->override->val &= ~mask; + regs[i]->override->val |= (v << shift) & mask; + regs[i]->override->mask |= mask; + + return; + } + } +} + +static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) +{ + do { + char buf[256]; + size_t len; + int i; + + cmdline = skip_spaces(cmdline); + + for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++); + if (!len) + return; + + len = min(len, ARRAY_SIZE(buf) - 1); + strncpy(buf, cmdline, len); + buf[len] = 0; + + if (strcmp(buf, "--") == 0) + return; + + cmdline += len; + + match_options(buf); + + for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++) + if (parameq(buf, aliases[i].alias)) + __parse_cmdline(aliases[i].feature, false); + } while (1); +} + +static __init void parse_cmdline(void) +{ + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + const u8 *prop; + void *fdt; + int node; + + fdt = get_early_fdt_ptr(); + if (!fdt) + goto out; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + goto out; + + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + goto out; + + __parse_cmdline(prop, true); + + if (!IS_ENABLED(CONFIG_CMDLINE_EXTEND)) + return; + } + +out: + __parse_cmdline(CONFIG_CMDLINE, true); +} + +/* Keep checkers quiet */ +void init_feature_override(void); + +asmlinkage void __init init_feature_override(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(regs); i++) { + if (regs[i]->override) { + regs[i]->override->val = 0; + regs[i]->override->mask = 0; + } + } + + parse_cmdline(); + + for (i = 0; i < ARRAY_SIZE(regs); i++) { + if (regs[i]->override) + __flush_dcache_area(regs[i]->override, + sizeof(*regs[i]->override)); + } +} diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 1c74c45b9494..27f8939deb1b 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -19,6 +19,7 @@ #include <asm/memory.h> #include <asm/mmu.h> #include <asm/sections.h> +#include <asm/setup.h> enum kaslr_status { KASLR_ENABLED, @@ -50,39 +51,7 @@ static __init u64 get_kaslr_seed(void *fdt) return ret; } -static __init bool cmdline_contains_nokaslr(const u8 *cmdline) -{ - const u8 *str; - - str = strstr(cmdline, "nokaslr"); - return str == cmdline || (str > cmdline && *(str - 1) == ' '); -} - -static __init bool is_kaslr_disabled_cmdline(void *fdt) -{ - if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { - int node; - const u8 *prop; - - node = fdt_path_offset(fdt, "/chosen"); - if (node < 0) - goto out; - - prop = fdt_getprop(fdt, node, "bootargs", NULL); - if (!prop) - goto out; - - if (cmdline_contains_nokaslr(prop)) - return true; - - if (IS_ENABLED(CONFIG_CMDLINE_EXTEND)) - goto out; - - return false; - } -out: - return cmdline_contains_nokaslr(CONFIG_CMDLINE); -} +struct arm64_ftr_override kaslr_feature_override __initdata; /* * This routine will be executed with the kernel mapped at its default virtual @@ -92,12 +61,11 @@ out: * containing function pointers) to be reinitialized, and zero-initialized * .bss variables will be reset to 0. */ -u64 __init kaslr_early_init(u64 dt_phys) +u64 __init kaslr_early_init(void) { void *fdt; u64 seed, offset, mask, module_range; unsigned long raw; - int size; /* * Set a reasonable default for module_alloc_base in case @@ -111,8 +79,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * and proceed with KASLR disabled. We will make another * attempt at mapping the FDT in setup_machine() */ - early_fixmap_init(); - fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); + fdt = get_early_fdt_ptr(); if (!fdt) { kaslr_status = KASLR_DISABLED_FDT_REMAP; return 0; @@ -127,7 +94,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * Check if 'nokaslr' appears on the command line, and * return 0 if that is the case. */ - if (is_kaslr_disabled_cmdline(fdt)) { + if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) { kaslr_status = KASLR_DISABLED_CMDLINE; return 0; } diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index a0b144cfaea7..90a335c74442 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -42,6 +42,7 @@ static void _kexec_image_info(const char *func, int line, pr_debug(" start: %lx\n", kimage->start); pr_debug(" head: %lx\n", kimage->head); pr_debug(" nr_segments: %lu\n", kimage->nr_segments); + pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc); for (i = 0; i < kimage->nr_segments; i++) { pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", @@ -58,6 +59,23 @@ void machine_kexec_cleanup(struct kimage *kimage) /* Empty routine needed to avoid build errors. */ } +int machine_kexec_post_load(struct kimage *kimage) +{ + void *reloc_code = page_to_virt(kimage->control_code_page); + + memcpy(reloc_code, arm64_relocate_new_kernel, + arm64_relocate_new_kernel_size); + kimage->arch.kern_reloc = __pa(reloc_code); + kexec_image_info(kimage); + + /* Flush the reloc_code in preparation for its execution. */ + __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size); + flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code + + arm64_relocate_new_kernel_size); + + return 0; +} + /** * machine_kexec_prepare - Prepare for a kexec reboot. * @@ -67,8 +85,6 @@ void machine_kexec_cleanup(struct kimage *kimage) */ int machine_kexec_prepare(struct kimage *kimage) { - kexec_image_info(kimage); - if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { pr_err("Can't kexec: CPUs are stuck in the kernel.\n"); return -EBUSY; @@ -143,8 +159,6 @@ static void kexec_segment_flush(const struct kimage *kimage) */ void machine_kexec(struct kimage *kimage) { - phys_addr_t reboot_code_buffer_phys; - void *reboot_code_buffer; bool in_kexec_crash = (kimage == kexec_crash_image); bool stuck_cpus = cpus_are_stuck_in_kernel(); @@ -155,31 +169,6 @@ void machine_kexec(struct kimage *kimage) WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), "Some CPUs may be stale, kdump will be unreliable.\n"); - reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); - reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys); - - kexec_image_info(kimage); - - /* - * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use - * after the kernel is shut down. - */ - memcpy(reboot_code_buffer, arm64_relocate_new_kernel, - arm64_relocate_new_kernel_size); - - /* Flush the reboot_code_buffer in preparation for its execution. */ - __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size); - - /* - * Although we've killed off the secondary CPUs, we don't update - * the online mask if we're handling a crash kernel and consequently - * need to avoid flush_icache_range(), which will attempt to IPI - * the offline CPUs. Therefore, we must use the __* variant here. - */ - __flush_icache_range((uintptr_t)reboot_code_buffer, - (uintptr_t)reboot_code_buffer + - arm64_relocate_new_kernel_size); - /* Flush the kimage list and its buffers. */ kexec_list_flush(kimage); @@ -193,7 +182,7 @@ void machine_kexec(struct kimage *kimage) /* * cpu_soft_restart will shutdown the MMU, disable data caches, then - * transfer control to the reboot_code_buffer which contains a copy of + * transfer control to the kern_reloc which contains a copy of * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel * uses physical addressing to relocate the new image to its final * position and transfers control to the image entry point when the @@ -203,12 +192,8 @@ void machine_kexec(struct kimage *kimage) * userspace (kexec-tools). * In kexec_file case, the kernel starts directly without purgatory. */ - cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start, -#ifdef CONFIG_KEXEC_FILE - kimage->arch.dtb_mem); -#else - 0); -#endif + cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start, + kimage->arch.dtb_mem); BUG(); /* Should never get here. */ } diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 2e224435c024..e53493d8b208 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -131,7 +131,7 @@ u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs, } #endif -#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b)) +#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b)) static int cmp_rela(const void *a, const void *b) { diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 3605f77ad4df..7d2318f80955 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -280,7 +280,7 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj, return 0; } -static struct attribute_group armv8_pmuv3_events_attr_group = { +static const struct attribute_group armv8_pmuv3_events_attr_group = { .name = "events", .attrs = armv8_pmuv3_event_attrs, .is_visible = armv8pmu_event_attr_is_visible, @@ -300,7 +300,7 @@ static struct attribute *armv8_pmuv3_format_attrs[] = { NULL, }; -static struct attribute_group armv8_pmuv3_format_attr_group = { +static const struct attribute_group armv8_pmuv3_format_attr_group = { .name = "format", .attrs = armv8_pmuv3_format_attrs, }; @@ -322,7 +322,7 @@ static struct attribute *armv8_pmuv3_caps_attrs[] = { NULL, }; -static struct attribute_group armv8_pmuv3_caps_attr_group = { +static const struct attribute_group armv8_pmuv3_caps_attr_group = { .name = "caps", .attrs = armv8_pmuv3_caps_attrs, }; @@ -810,7 +810,7 @@ static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, { int idx; - for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) { + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) { if (!test_and_set_bit(idx, cpuc->used_mask)) return idx; } @@ -1188,6 +1188,12 @@ static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmuv3_map_event); } +static int armv8_a78_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a78", + armv8_pmuv3_map_event); +} + static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu) { return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1", @@ -1225,6 +1231,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a75-pmu", .data = armv8_a75_pmu_init}, {.compatible = "arm,cortex-a76-pmu", .data = armv8_a76_pmu_init}, {.compatible = "arm,cortex-a77-pmu", .data = armv8_a77_pmu_init}, + {.compatible = "arm,cortex-a78-pmu", .data = armv8_a78_pmu_init}, {.compatible = "arm,neoverse-e1-pmu", .data = armv8_e1_pmu_init}, {.compatible = "arm,neoverse-n1-pmu", .data = armv8_n1_pmu_init}, {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 6616486a58fe..4cc1ccc8d6ab 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -304,7 +304,7 @@ void __show_regs(struct pt_regs *regs) } } -void show_regs(struct pt_regs * regs) +void show_regs(struct pt_regs *regs) { __show_regs(regs); dump_backtrace(regs, NULL, KERN_DEFAULT); @@ -587,7 +587,7 @@ unsigned long get_wchan(struct task_struct *p) ret = frame.pc; goto out; } - } while (count ++ < 16); + } while (count++ < 16); out: put_task_stack(p); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 8ac487c84e37..3d5c8afca75b 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -194,6 +194,7 @@ static void ptrace_hbptriggered(struct perf_event *bp, } arm64_force_sig_ptrace_errno_trap(si_errno, bkpt->trigger, desc); + return; } #endif arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc); diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 84eec95ec06c..b78ea5de97a4 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -17,28 +17,24 @@ /* * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. * - * The memory that the old kernel occupies may be overwritten when coping the + * The memory that the old kernel occupies may be overwritten when copying the * new image to its final location. To assure that the * arm64_relocate_new_kernel routine which does that copy is not overwritten, * all code and data needed by arm64_relocate_new_kernel must be between the * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec - * control_code_page, a special page which has been set up to be preserved - * during the copy operation. + * safe memory that has been set up to be preserved during the copy operation. */ SYM_CODE_START(arm64_relocate_new_kernel) - /* Setup the list loop variables. */ mov x18, x2 /* x18 = dtb address */ mov x17, x1 /* x17 = kimage_start */ mov x16, x0 /* x16 = kimage_head */ - raw_dcache_line_size x15, x0 /* x15 = dcache line size */ mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ - /* Check if the new image needs relocation. */ tbnz x16, IND_DONE_BIT, .Ldone - + raw_dcache_line_size x15, x1 /* x15 = dcache line size */ .Lloop: and x12, x16, PAGE_MASK /* x12 = addr */ @@ -47,44 +43,28 @@ SYM_CODE_START(arm64_relocate_new_kernel) tbz x16, IND_SOURCE_BIT, .Ltest_indirection /* Invalidate dest page to PoC. */ - mov x0, x13 - add x20, x0, #PAGE_SIZE + mov x2, x13 + add x20, x2, #PAGE_SIZE sub x1, x15, #1 - bic x0, x0, x1 -2: dc ivac, x0 - add x0, x0, x15 - cmp x0, x20 + bic x2, x2, x1 +2: dc ivac, x2 + add x2, x2, x15 + cmp x2, x20 b.lo 2b dsb sy - mov x20, x13 - mov x21, x12 - copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7 - - /* dest += PAGE_SIZE */ - add x13, x13, PAGE_SIZE + copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8 b .Lnext - .Ltest_indirection: tbz x16, IND_INDIRECTION_BIT, .Ltest_destination - - /* ptr = addr */ - mov x14, x12 + mov x14, x12 /* ptr = addr */ b .Lnext - .Ltest_destination: tbz x16, IND_DESTINATION_BIT, .Lnext - - /* dest = addr */ - mov x13, x12 - + mov x13, x12 /* dest = addr */ .Lnext: - /* entry = *ptr++ */ - ldr x16, [x14], #8 - - /* while (!(entry & DONE)) */ - tbz x16, IND_DONE_BIT, .Lloop - + ldr x16, [x14], #8 /* entry = *ptr++ */ + tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */ .Ldone: /* wait for writes from copy_page to finish */ dsb nsh diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index c18aacde8bb0..61845c0821d9 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -168,6 +168,21 @@ static void __init smp_build_mpidr_hash(void) pr_warn("Large number of MPIDR hash buckets detected\n"); } +static void *early_fdt_ptr __initdata; + +void __init *get_early_fdt_ptr(void) +{ + return early_fdt_ptr; +} + +asmlinkage void __init early_fdt_map(u64 dt_phys) +{ + int fdt_size; + + early_fixmap_init(); + early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL); +} + static void __init setup_machine_fdt(phys_addr_t dt_phys) { int size; diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 6bdef7362c0e..5bfd9b87f85d 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -100,6 +100,7 @@ SYM_FUNC_END(__cpu_suspend_enter) .pushsection ".idmap.text", "awx" SYM_CODE_START(cpu_resume) bl init_kernel_el + bl switch_to_vhe bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ adrp x1, swapper_pg_dir diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index fa56af1a59c3..0fb42129b469 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -44,6 +44,10 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) unsigned long fp = frame->fp; struct stack_info info; + /* Terminal record; nothing to unwind */ + if (!fp) + return -EINVAL; + if (fp & 0xf) return -EINVAL; @@ -104,15 +108,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->pc = ptrauth_strip_insn_pac(frame->pc); - /* - * Frames created upon entry from EL0 have NULL FP and PC values, so - * don't bother reporting these. Frames created by __noreturn functions - * might have a valid FP even if PC is bogus, so only terminate where - * both are NULL. - */ - if (!frame->fp && !frame->pc) - return -EINVAL; - return 0; } NOKPROBE_SYMBOL(unwind_frame); diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index c2877c332f2d..b9cf12b271d7 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -65,35 +65,6 @@ static inline bool has_syscall_work(unsigned long flags) int syscall_trace_enter(struct pt_regs *regs); void syscall_trace_exit(struct pt_regs *regs); -#ifdef CONFIG_ARM64_ERRATUM_1463225 -DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); - -static void cortex_a76_erratum_1463225_svc_handler(void) -{ - u32 reg, val; - - if (!unlikely(test_thread_flag(TIF_SINGLESTEP))) - return; - - if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225))) - return; - - __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1); - reg = read_sysreg(mdscr_el1); - val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE; - write_sysreg(val, mdscr_el1); - asm volatile("msr daifclr, #8"); - isb(); - - /* We will have taken a single-step exception by this point */ - - write_sysreg(reg, mdscr_el1); - __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0); -} -#else -static void cortex_a76_erratum_1463225_svc_handler(void) { } -#endif /* CONFIG_ARM64_ERRATUM_1463225 */ - static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, const syscall_fn_t syscall_table[]) { @@ -120,7 +91,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * (Similarly for HVC and SMC elsewhere.) */ - cortex_a76_erratum_1463225_svc_handler(); local_daif_restore(DAIF_PROCCTX); if (flags & _TIF_MTE_ASYNC_FAULT) { diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index f6faa697e83e..e08a4126453a 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -199,76 +199,38 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) return 0; } -static inline bool -enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus) -{ - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - - if (!policy) { - pr_debug("CPU%d: No cpufreq policy found.\n", cpu); - return false; - } - - if (cpumask_subset(policy->related_cpus, valid_cpus)) - cpumask_or(amu_fie_cpus, policy->related_cpus, - amu_fie_cpus); - - cpufreq_cpu_put(policy); - - return true; -} - static DEFINE_STATIC_KEY_FALSE(amu_fie_key); #define amu_freq_invariant() static_branch_unlikely(&amu_fie_key) -static int __init init_amu_fie(void) +static void amu_fie_setup(const struct cpumask *cpus) { - bool invariance_status = topology_scale_freq_invariant(); - cpumask_var_t valid_cpus; - bool have_policy = false; - int ret = 0; + bool invariant; int cpu; - if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL)) - return -ENOMEM; - - if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) { - ret = -ENOMEM; - goto free_valid_mask; - } + /* We are already set since the last insmod of cpufreq driver */ + if (unlikely(cpumask_subset(cpus, amu_fie_cpus))) + return; - for_each_present_cpu(cpu) { + for_each_cpu(cpu, cpus) { if (!freq_counters_valid(cpu) || freq_inv_set_max_ratio(cpu, cpufreq_get_hw_max_freq(cpu) * 1000, arch_timer_get_rate())) - continue; - - cpumask_set_cpu(cpu, valid_cpus); - have_policy |= enable_policy_freq_counters(cpu, valid_cpus); + return; } - /* - * If we are not restricted by cpufreq policies, we only enable - * the use of the AMU feature for FIE if all CPUs support AMU. - * Otherwise, enable_policy_freq_counters has already enabled - * policy cpus. - */ - if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask)) - cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus); + cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus); - if (!cpumask_empty(amu_fie_cpus)) { - pr_info("CPUs[%*pbl]: counters will be used for FIE.", - cpumask_pr_args(amu_fie_cpus)); - static_branch_enable(&amu_fie_key); - } + invariant = topology_scale_freq_invariant(); - /* - * If the system is not fully invariant after AMU init, disable - * partial use of counters for frequency invariance. - */ - if (!topology_scale_freq_invariant()) - static_branch_disable(&amu_fie_key); + /* We aren't fully invariant yet */ + if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask)) + return; + + static_branch_enable(&amu_fie_key); + + pr_debug("CPUs[%*pbl]: counters will be used for FIE.", + cpumask_pr_args(cpus)); /* * Task scheduler behavior depends on frequency invariance support, @@ -276,15 +238,50 @@ static int __init init_amu_fie(void) * a result of counter initialisation and use, retrigger the build of * scheduling domains to ensure the information is propagated properly. */ - if (invariance_status != topology_scale_freq_invariant()) + if (!invariant) rebuild_sched_domains_energy(); +} + +static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_policy *policy = data; + + if (val == CPUFREQ_CREATE_POLICY) + amu_fie_setup(policy->related_cpus); + + /* + * We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU + * counters don't have any dependency on cpufreq driver once we have + * initialized AMU support and enabled invariance. The AMU counters will + * keep on working just fine in the absence of the cpufreq driver, and + * for the CPUs for which there are no counters available, the last set + * value of freq_scale will remain valid as that is the frequency those + * CPUs are running at. + */ + + return 0; +} + +static struct notifier_block init_amu_fie_notifier = { + .notifier_call = init_amu_fie_callback, +}; + +static int __init init_amu_fie(void) +{ + int ret; + + if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) + return -ENOMEM; -free_valid_mask: - free_cpumask_var(valid_cpus); + ret = cpufreq_register_notifier(&init_amu_fie_notifier, + CPUFREQ_POLICY_NOTIFIER); + if (ret) + free_cpumask_var(amu_fie_cpus); return ret; } -late_initcall_sync(init_amu_fie); +core_initcall(init_amu_fie); bool arch_freq_counters_available(const struct cpumask *cpus) { diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 6895ce777e7f..a05d34f0e82a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -45,7 +45,7 @@ #include <asm/system_misc.h> #include <asm/sysreg.h> -static const char *handler[]= { +static const char *handler[] = { "Synchronous Abort", "IRQ", "FIQ", diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso-wrap.S index c4b1990bf2be..c4b1990bf2be 100644 --- a/arch/arm64/kernel/vdso/vdso.S +++ b/arch/arm64/kernel/vdso-wrap.S diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index cd9c3fa25902..76c0255ecc91 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -44,7 +44,6 @@ endif # Disable gcov profiling for VDSO code GCOV_PROFILE := n -obj-y += vdso.o targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh index 8b806eacd0a6..0387209d65b1 100755 --- a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh +++ b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh @@ -13,4 +13,4 @@ LC_ALL=C sed -n -e 's/^00*/0/' -e \ -'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p' +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2 0x\1/p' diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32-wrap.S index e72ac7bc4c04..e72ac7bc4c04 100644 --- a/arch/arm64/kernel/vdso32/vdso.S +++ b/arch/arm64/kernel/vdso32-wrap.S diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index a1e0f91e6cea..789ad420f16b 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -155,7 +155,6 @@ c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday)) asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso)) obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) -obj-y += vdso.o targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 4c0b0c89ad59..68f76a96c60b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -316,3 +316,11 @@ ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, * If padding is applied before .head.text, virt<->phys conversions will fail. */ ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned") + +ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET, + "RESERVED_SWAPPER_OFFSET is wrong!") + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET, + "TRAMP_SWAPPER_OFFSET is wrong!") +#endif |