diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-10 05:29:27 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-10 05:29:27 -0700 |
commit | b970afcfcabd63cd3832e95db096439c177c3592 (patch) | |
tree | b63e662c780e02617916f4c0269e2adddc67f5a0 /arch/powerpc/kernel | |
parent | 8ea5b2abd07e2280a332bd9c1a7f4dd15b9b6c13 (diff) | |
parent | 8150a153c013aa2dd1ffae43370b89ac1347a7fb (diff) | |
download | lwn-b970afcfcabd63cd3832e95db096439c177c3592.tar.gz lwn-b970afcfcabd63cd3832e95db096439c177c3592.zip |
Merge tag 'powerpc-5.2-1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
"Slightly delayed due to the issue with printk() calling
probe_kernel_read() interacting with our new user access prevention
stuff, but all fixed now.
The only out-of-area changes are the addition of a cpuhp_state, small
additions to Documentation and MAINTAINERS updates.
Highlights:
- Support for Kernel Userspace Access/Execution Prevention (like
SMAP/SMEP/PAN/PXN) on some 64-bit and 32-bit CPUs. This prevents
the kernel from accidentally accessing userspace outside
copy_to/from_user(), or ever executing userspace.
- KASAN support on 32-bit.
- Rework of where we map the kernel, vmalloc, etc. on 64-bit hash to
use the same address ranges we use with the Radix MMU.
- A rewrite into C of large parts of our idle handling code for
64-bit Book3S (ie. power8 & power9).
- A fast path entry for syscalls on 32-bit CPUs, for a 12-17% speedup
in the null_syscall benchmark.
- On 64-bit bare metal we have support for recovering from errors
with the time base (our clocksource), however if that fails
currently we hang in __delay() and never crash. We now have support
for detecting that case and short circuiting __delay() so we at
least panic() and reboot.
- Add support for optionally enabling the DAWR on Power9, which had
to be disabled by default due to a hardware erratum. This has the
effect of enabling hardware breakpoints for GDB, the downside is a
badly behaved program could crash the machine by pointing the DAWR
at cache inhibited memory. This is opt-in obviously.
- xmon, our crash handler, gets support for a read only mode where
operations that could change memory or otherwise disturb the system
are disabled.
Plus many clean-ups, reworks and minor fixes etc.
Thanks to: Christophe Leroy, Akshay Adiga, Alastair D'Silva, Alexey
Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Anju T Sudhakar,
Anton Blanchard, Ben Hutchings, Bo YU, Breno Leitao, Cédric Le Goater,
Christopher M. Riedl, Christoph Hellwig, Colin Ian King, David Gibson,
Ganesh Goudar, Gautham R. Shenoy, George Spelvin, Greg Kroah-Hartman,
Greg Kurz, Horia Geantă, Jagadeesh Pagadala, Joel Stanley, Joe
Perches, Julia Lawall, Laurentiu Tudor, Laurent Vivier, Lukas Bulwahn,
Madhavan Srinivasan, Mahesh Salgaonkar, Mathieu Malaterre, Michael
Neuling, Mukesh Ojha, Nathan Fontenot, Nathan Lynch, Nicholas Piggin,
Nick Desaulniers, Oliver O'Halloran, Peng Hao, Qian Cai, Ravi
Bangoria, Rick Lindsley, Russell Currey, Sachin Sant, Stewart Smith,
Sukadev Bhattiprolu, Thomas Huth, Tobin C. Harding, Tyrel Datwyler,
Valentin Schneider, Wei Yongjun, Wen Yang, YueHaibing"
* tag 'powerpc-5.2-1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (205 commits)
powerpc/64s: Use early_mmu_has_feature() in set_kuap()
powerpc/book3s/64: check for NULL pointer in pgd_alloc()
powerpc/mm: Fix hugetlb page initialization
ocxl: Fix return value check in afu_ioctl()
powerpc/mm: fix section mismatch for setup_kup()
powerpc/mm: fix redundant inclusion of pgtable-frag.o in Makefile
powerpc/mm: Fix makefile for KASAN
powerpc/kasan: add missing/lost Makefile
selftests/powerpc: Add a signal fuzzer selftest
powerpc/booke64: set RI in default MSR
ocxl: Provide global MMIO accessors for external drivers
ocxl: move event_fd handling to frontend
ocxl: afu_irq only deals with IRQ IDs, not offsets
ocxl: Allow external drivers to use OpenCAPI contexts
ocxl: Create a clear delineation between ocxl backend & frontend
ocxl: Don't pass pci_dev around
ocxl: Split pci.c
ocxl: Remove some unused exported symbols
ocxl: Remove superfluous 'extern' from headers
ocxl: read_pasid never returns an error, so make it void
...
Diffstat (limited to 'arch/powerpc/kernel')
40 files changed, 1512 insertions, 1878 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index cddadccf551d..0ea6c4aa3a20 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -31,6 +31,18 @@ CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE) endif +KASAN_SANITIZE_early_32.o := n +KASAN_SANITIZE_cputable.o := n +KASAN_SANITIZE_prom_init.o := n +KASAN_SANITIZE_btext.o := n + +ifdef CONFIG_KASAN +CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING +endif + obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ process.o systbl.o idle.o \ @@ -93,7 +105,7 @@ extra-y += vmlinux.lds obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o -obj-$(CONFIG_PPC32) += entry_32.o setup_32.o +obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_BOOTX_TEXT) += btext.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 86a61e5f8285..8e02444e9d3d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -147,6 +147,9 @@ int main(void) #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu); #endif +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) + OFFSET(KUAP, thread_struct, kuap); +#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM OFFSET(PACATMSCRATCH, paca_struct, tm_scratch); @@ -268,7 +271,6 @@ int main(void) OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime); OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime); OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); - OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost); OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); #else /* CONFIG_PPC64 */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE @@ -332,6 +334,10 @@ int main(void) STACK_PT_REGS_OFFSET(_PPR, ppr); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_KUAP + STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); +#endif + #if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); @@ -766,23 +772,6 @@ int main(void) OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl); #endif -#ifdef CONFIG_PPC_POWERNV - OFFSET(PACA_CORE_IDLE_STATE_PTR, paca_struct, core_idle_state_ptr); - OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state); - OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask); - OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); - OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); - OFFSET(PACA_DONT_STOP, paca_struct, dont_stop); -#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f) - STOP_SPR(STOP_PID, pid); - STOP_SPR(STOP_LDBAR, ldbar); - STOP_SPR(STOP_FSCR, fscr); - STOP_SPR(STOP_HFSCR, hfscr); - STOP_SPR(STOP_MMCR1, mmcr1); - STOP_SPR(STOP_MMCR2, mmcr2); - STOP_SPR(STOP_MMCRA, mmcra); -#endif - DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 53102764fd2f..f2ed3ef4b129 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -759,23 +759,22 @@ static void cacheinfo_create_index_dir(struct cache *cache, int index, index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL); if (!index_dir) - goto err; + return; index_dir->cache = cache; rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type, cache_dir->kobj, "index%d", index); - if (rc) - goto err; + if (rc) { + kobject_put(&index_dir->kobj); + kfree(index_dir); + return; + } index_dir->next = cache_dir->index; cache_dir->index = index_dir; cacheinfo_create_index_opt_attrs(index_dir); - - return; -err: - kfree(index_dir); } static void cacheinfo_sysfs_populate(unsigned int cpu_id, diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 1eab54bc6ee9..cd12f362b61f 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2147,7 +2147,11 @@ void __init set_cur_cpu_spec(struct cpu_spec *s) struct cpu_spec *t = &the_cpu_spec; t = PTRRELOC(t); - *t = *s; + /* + * use memcpy() instead of *t = *s so that GCC replaces it + * by __memcpy() when KASAN is active + */ + memcpy(t, s, sizeof(*t)); *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; } @@ -2161,8 +2165,11 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, t = PTRRELOC(t); old = *t; - /* Copy everything, then do fixups */ - *t = *s; + /* + * Copy everything, then do fixups. Use memcpy() instead of *t = *s + * so that GCC replaces it by __memcpy() when KASAN is active + */ + memcpy(t, s, sizeof(*t)); /* * If we are overriding a previous value derived from the real diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index b6fe883b1016..5ec3b3835925 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -18,6 +18,7 @@ #include <asm/dbell.h> #include <asm/irq_regs.h> #include <asm/kvm_ppc.h> +#include <asm/trace.h> #ifdef CONFIG_SMP @@ -81,6 +82,7 @@ void doorbell_exception(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); irq_enter(); + trace_doorbell_entry(regs); ppc_msgsync(); @@ -91,6 +93,7 @@ void doorbell_exception(struct pt_regs *regs) smp_ipi_demux_relaxed(); /* already performed the barrier */ + trace_doorbell_exit(regs); irq_exit(); set_irq_regs(old_regs); } diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c new file mode 100644 index 000000000000..3482118ffe76 --- /dev/null +++ b/arch/powerpc/kernel/early_32.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Early init before relocation + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <asm/setup.h> +#include <asm/sections.h> +#include <asm/asm-prototypes.h> + +/* + * We're called here very early in the boot. + * + * Note that the kernel may be running at an address which is different + * from the address that it was linked at, so we must use RELOC/PTRRELOC + * to access static data (including strings). -- paulus + */ +notrace unsigned long __init early_init(unsigned long dt_ptr) +{ + unsigned long offset = reloc_offset(); + + /* First zero the BSS */ + memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start); + + /* + * Identify the CPU type and fix up code sections + * that depend on which cpu we have. + */ + identify_cpu(offset, mfspr(SPRN_PVR)); + + apply_feature_fixups(); + + return KERNELBASE + offset; +} diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index b61cfd29c76f..c18f3490a77e 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -36,15 +36,10 @@ #include <asm/asm-405.h> #include <asm/feature-fixups.h> #include <asm/barrier.h> +#include <asm/kup.h> +#include <asm/bug.h> -/* - * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. - */ -#if MSR_KERNEL >= 0x10000 -#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l -#else -#define LOAD_MSR_KERNEL(r, x) li r,(x) -#endif +#include "head_32.h" /* * Align to 4k in order to ensure that all functions modyfing srr0/srr1 @@ -150,8 +145,8 @@ transfer_to_handler: stw r12,_CTR(r11) stw r2,_XER(r11) mfspr r12,SPRN_SPRG_THREAD - addi r2,r12,-THREAD beq 2f /* if from user, fix up THREAD.regs */ + addi r2, r12, -THREAD addi r11,r1,STACK_FRAME_OVERHEAD stw r11,PT_REGS(r12) #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) @@ -161,6 +156,9 @@ transfer_to_handler: andis. r12,r12,DBCR0_IDM@h #endif ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_lock r11, r12 +#endif #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) beq+ 3f /* From user and task is ptraced - load up global dbcr0 */ @@ -186,6 +184,8 @@ transfer_to_handler: 2: /* if from kernel, check interrupted DOZE/NAP mode and * check for stack overflow */ + kuap_save_and_lock r11, r12, r9, r2, r0 + addi r2, r12, -THREAD lwz r9,KSP_LIMIT(r12) cmplw r1,r9 /* if r1 <= ksp_limit */ ble- stack_ovf /* then the kernel stack overflowed */ @@ -207,26 +207,43 @@ transfer_to_handler_cont: mtspr SPRN_NRI, r0 #endif #ifdef CONFIG_TRACE_IRQFLAGS + /* + * When tracing IRQ state (lockdep) we enable the MMU before we call + * the IRQ tracing functions as they might access vmalloc space or + * perform IOs for console output. + * + * To speed up the syscall path where interrupts stay on, let's check + * first if we are changing the MSR value at all. + */ + tophys(r12, r1) + lwz r12,_MSR(r12) + andi. r12,r12,MSR_EE + bne 1f + + /* MSR isn't changing, just transition directly */ +#endif + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r10 + mtlr r9 + SYNC + RFI /* jump to handler, enable MMU */ + +#ifdef CONFIG_TRACE_IRQFLAGS +1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to + * keep interrupts disabled at this point otherwise we might risk + * taking an interrupt before we tell lockdep they are enabled. + */ lis r12,reenable_mmu@h ori r12,r12,reenable_mmu@l + LOAD_MSR_KERNEL(r0, MSR_KERNEL) mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR1,r0 SYNC RFI -reenable_mmu: /* re-enable mmu so we can */ - mfmsr r10 - lwz r12,_MSR(r1) - xor r10,r10,r12 - andi. r10,r10,MSR_EE /* Did EE change? */ - beq 1f +reenable_mmu: /* - * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1. - * If from user mode there is only one stack frame on the stack, and - * accessing CALLER_ADDR1 will cause oops. So we need create a dummy - * stack frame to make trace_hardirqs_off happy. - * - * This is handy because we also need to save a bunch of GPRs, + * We save a bunch of GPRs, * r3 can be different from GPR3(r1) at this point, r9 and r11 * contains the old MSR and handler address respectively, * r4 & r5 can contain page fault arguments that need to be passed @@ -234,14 +251,19 @@ reenable_mmu: /* re-enable mmu so we can */ * they aren't useful past this point (aren't syscall arguments), * the rest is restored from the exception frame. */ + stwu r1,-32(r1) stw r9,8(r1) stw r11,12(r1) stw r3,16(r1) stw r4,20(r1) stw r5,24(r1) - bl trace_hardirqs_off - lwz r5,24(r1) + + /* If we are disabling interrupts (normal case), simply log it with + * lockdep + */ +1: bl trace_hardirqs_off +2: lwz r5,24(r1) lwz r4,20(r1) lwz r3,16(r1) lwz r11,12(r1) @@ -251,15 +273,9 @@ reenable_mmu: /* re-enable mmu so we can */ lwz r6,GPR6(r1) lwz r7,GPR7(r1) lwz r8,GPR8(r1) -1: mtctr r11 + mtctr r11 mtlr r9 bctr /* jump to handler */ -#else /* CONFIG_TRACE_IRQFLAGS */ - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r10 - mtlr r9 - SYNC - RFI /* jump to handler, enable MMU */ #endif /* CONFIG_TRACE_IRQFLAGS */ #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) @@ -272,6 +288,7 @@ reenable_mmu: /* re-enable mmu so we can */ lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ + kuap_restore r11, r2, r3, r4, r5 b fast_exception_return #endif @@ -301,6 +318,33 @@ stack_ovf: SYNC RFI +#ifdef CONFIG_TRACE_IRQFLAGS +trace_syscall_entry_irq_off: + /* + * Syscall shouldn't happen while interrupts are disabled, + * so let's do a warning here. + */ +0: trap + EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING + bl trace_hardirqs_on + + /* Now enable for real */ + LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE) + mtmsr r10 + + REST_GPR(0, r1) + REST_4GPRS(3, r1) + REST_2GPRS(7, r1) + b DoSyscall +#endif /* CONFIG_TRACE_IRQFLAGS */ + + .globl transfer_to_syscall +transfer_to_syscall: +#ifdef CONFIG_TRACE_IRQFLAGS + andi. r12,r9,MSR_EE + beq- trace_syscall_entry_irq_off +#endif /* CONFIG_TRACE_IRQFLAGS */ + /* * Handle a system call. */ @@ -312,33 +356,14 @@ _GLOBAL(DoSyscall) stw r3,ORIG_GPR3(r1) li r12,0 stw r12,RESULT(r1) - lwz r11,_CCR(r1) /* Clear SO bit in CR */ - rlwinm r11,r11,0,4,2 - stw r11,_CCR(r1) #ifdef CONFIG_TRACE_IRQFLAGS - /* Return from syscalls can (and generally will) hard enable - * interrupts. You aren't supposed to call a syscall with - * interrupts disabled in the first place. However, to ensure - * that we get it right vs. lockdep if it happens, we force - * that hard enable here with appropriate tracing if we see - * that we have been called with interrupts off - */ + /* Make sure interrupts are enabled */ mfmsr r11 andi. r12,r11,MSR_EE - bne+ 1f - /* We came in with interrupts disabled, we enable them now */ - bl trace_hardirqs_on - mfmsr r11 - lwz r0,GPR0(r1) - lwz r3,GPR3(r1) - lwz r4,GPR4(r1) - ori r11,r11,MSR_EE - lwz r5,GPR5(r1) - lwz r6,GPR6(r1) - lwz r7,GPR7(r1) - lwz r8,GPR8(r1) - mtmsr r11 -1: + /* We came in with interrupts disabled, we WARN and mark them enabled + * for lockdep now */ +0: tweqi r12, 0 + EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING #endif /* CONFIG_TRACE_IRQFLAGS */ lwz r11,TI_FLAGS(r2) andi. r11,r11,_TIF_SYSCALL_DOTRACE @@ -392,8 +417,7 @@ syscall_exit_cont: lwz r8,_MSR(r1) #ifdef CONFIG_TRACE_IRQFLAGS /* If we are going to return from the syscall with interrupts - * off, we trace that here. It shouldn't happen though but we - * want to catch the bugger if it does right ? + * off, we trace that here. It shouldn't normally happen. */ andi. r10,r8,MSR_EE bne+ 1f @@ -422,12 +446,11 @@ BEGIN_FTR_SECTION lwarx r7,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) stwcx. r0,0,r1 /* to clear the reservation */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - andi. r4,r8,MSR_PR - beq 3f ACCOUNT_CPU_USER_EXIT(r2, r5, r7) -3: +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_unlock r5, r7 #endif + kuap_check r2, r4 lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -678,6 +701,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ + kuap_check r2, r4 #ifdef CONFIG_SMP /* We need a sync somewhere here to make sure that if the * previous task gets rescheduled on another CPU, it sees all @@ -820,6 +844,9 @@ restore_user: bnel- load_dbcr0 #endif ACCOUNT_CPU_USER_EXIT(r2, r10, r11) +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_unlock r10, r11 +#endif b restore @@ -866,12 +893,12 @@ resume_kernel: /* check current_thread_info->preempt_count */ lwz r0,TI_PREEMPT(r2) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ - bne restore + bne restore_kuap andi. r8,r8,_TIF_NEED_RESCHED - beq+ restore + beq+ restore_kuap lwz r3,_MSR(r1) andi. r0,r3,MSR_EE /* interrupts off? */ - beq restore /* don't schedule if so */ + beq restore_kuap /* don't schedule if so */ #ifdef CONFIG_TRACE_IRQFLAGS /* Lockdep thinks irqs are enabled, we need to call * preempt_schedule_irq with IRQs off, so we inform lockdep @@ -879,10 +906,7 @@ resume_kernel: */ bl trace_hardirqs_off #endif -1: bl preempt_schedule_irq - lwz r3,TI_FLAGS(r2) - andi. r0,r3,_TIF_NEED_RESCHED - bne- 1b + bl preempt_schedule_irq #ifdef CONFIG_TRACE_IRQFLAGS /* And now, to properly rebalance the above, we tell lockdep they * are being turned back on, which will happen when we return @@ -890,6 +914,8 @@ resume_kernel: bl trace_hardirqs_on #endif #endif /* CONFIG_PREEMPT */ +restore_kuap: + kuap_restore r1, r2, r9, r10, r0 /* interrupts are hard-disabled at this point */ restore: @@ -913,28 +939,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) * off in this assembly code while peeking at TI_FLAGS() and such. However * we need to inform it if the exception turned interrupts off, and we * are about to trun them back on. - * - * The problem here sadly is that we don't know whether the exceptions was - * one that turned interrupts off or not. So we always tell lockdep about - * turning them on here when we go back to wherever we came from with EE - * on, even if that may meen some redudant calls being tracked. Maybe later - * we could encode what the exception did somewhere or test the exception - * type in the pt_regs but that sounds overkill */ andi. r10,r9,MSR_EE beq 1f - /* - * Since the ftrace irqsoff latency trace checks CALLER_ADDR1, - * which is the stack frame here, we need to force a stack frame - * in case we came from user space. - */ stwu r1,-32(r1) mflr r0 stw r0,4(r1) - stwu r1,-32(r1) bl trace_hardirqs_on - lwz r1,0(r1) - lwz r1,0(r1) + addi r1, r1, 32 lwz r9,_MSR(r1) 1: #endif /* CONFIG_TRACE_IRQFLAGS */ @@ -1197,6 +1209,7 @@ load_dbcr0: .section .bss .align 4 + .global global_dbcr0 global_dbcr0: .space 8*NR_CPUS .previous @@ -1207,9 +1220,10 @@ do_work: /* r10 contains MSR_KERNEL here */ beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ - /* Note: We don't need to inform lockdep that we are enabling - * interrupts here. As far as it knows, they are already enabled - */ +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_on + mfmsr r10 +#endif ori r10,r10,MSR_EE SYNC MTMSRD(r10) /* hard-enable interrupts */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 15c67d2c0534..d978af78bf2a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -46,6 +46,7 @@ #include <asm/exception-64e.h> #endif #include <asm/feature-fixups.h> +#include <asm/kup.h> /* * System calls. @@ -120,6 +121,9 @@ END_BTB_FLUSH_SECTION addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ + + kuap_check_amr r10, r11 + #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) BEGIN_FW_FTR_SECTION beq 33f @@ -275,6 +279,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) + kuap_check_amr r10, r11 + #ifdef CONFIG_PPC_BOOK3S /* * Clear MSR_RI, MSR_EE is already and remains disabled. We could do @@ -296,6 +302,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r8, PACATMSCRATCH(r13) #endif + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ ld r2,GPR2(r1) ld r1,GPR1(r1) @@ -306,8 +316,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) RFI_TO_USER b . /* prevent speculative execution */ - /* exit to kernel */ -1: ld r2,GPR2(r1) +1: /* exit to kernel */ + kuap_restore_amr r2 + + ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 @@ -594,6 +606,8 @@ _GLOBAL(_switch) std r23,_CCR(r1) std r1,KSP(r3) /* Set old stack pointer */ + kuap_check_amr r9, r10 + FLUSH_COUNT_CACHE /* @@ -851,13 +865,7 @@ resume_kernel: * sure we are soft-disabled first and reconcile irq state. */ RECONCILE_IRQ_STATE(r3,r4) -1: bl preempt_schedule_irq - - /* Re-test flags and eventually loop */ - ld r9, PACA_THREAD_INFO(r13) - ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED - bne 1b + bl preempt_schedule_irq /* * arch_local_irq_restore() from preempt_schedule_irq above may @@ -942,6 +950,8 @@ fast_exception_return: ld r4,_XER(r1) mtspr SPRN_XER,r4 + kuap_check_amr r5, r6 + REST_8GPRS(5, r1) andi. r0,r3,MSR_RI @@ -974,6 +984,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ mtspr SPRN_SRR1,r3 ld r2,_CCR(r1) @@ -1006,6 +1020,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r0,GPR0(r1) ld r2,GPR2(r1) ld r3,GPR3(r1) + + kuap_restore_amr r4 + ld r4,GPR4(r1) ld r1,GPR1(r1) RFI_TO_KERNEL diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9481a117e242..6b86055e5251 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -19,6 +19,7 @@ #include <asm/cpuidle.h> #include <asm/head-64.h> #include <asm/feature-fixups.h> +#include <asm/kup.h> /* * There are a few constraints to be concerned with. @@ -120,7 +121,9 @@ EXC_VIRT_NONE(0x4000, 0x100) mfspr r10,SPRN_SRR1 ; \ rlwinm. r10,r10,47-31,30,31 ; \ beq- 1f ; \ - cmpwi cr3,r10,2 ; \ + cmpwi cr1,r10,2 ; \ + mfspr r3,SPRN_SRR1 ; \ + bltlr cr1 ; /* no state loss, return to idle caller */ \ BRANCH_TO_C000(r10, system_reset_idle_common) ; \ 1: \ KVMTEST_PR(n) ; \ @@ -144,8 +147,11 @@ TRAMP_KVM(PACA_EXNMI, 0x100) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) - mfspr r12,SPRN_SRR1 - b pnv_powersave_wakeup + /* + * This must be a direct branch (without linker branch stub) because + * we can not use TOC at this point as r2 may not be restored yet. + */ + b idle_return_gpr_loss #endif /* @@ -309,6 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) mfspr r11,SPRN_DSISR /* Save DSISR */ std r11,_DSISR(r1) std r9,_CCR(r1) /* Save CR in stackframe */ + kuap_save_amr_and_lock r9, r10, cr1 /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ @@ -427,17 +434,17 @@ EXC_COMMON_BEGIN(machine_check_idle_common) * Then decrement MCE nesting after finishing with the stack. */ ld r3,_MSR(r1) + ld r4,_LINK(r1) lhz r11,PACA_IN_MCE(r13) subi r11,r11,1 sth r11,PACA_IN_MCE(r13) - /* Turn off the RI bit because SRR1 is used by idle wakeup code. */ - /* Recoverability could be improved by reducing the use of SRR1. */ - li r11,0 - mtmsrd r11,1 - - b pnv_powersave_wakeup_mce + mtlr r4 + rlwinm r10,r3,47-31,30,31 + cmpwi cr1,r10,2 + bltlr cr1 /* no state loss, return to idle caller */ + b idle_return_gpr_loss #endif /* * Handle machine check early in real mode. We come here with @@ -1109,6 +1116,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ EXCEPTION_PROLOG_COMMON_1() + /* We don't touch AMR here, we never go to virtual mode */ EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 45a8d0be1c96..25f063f56ec5 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -36,6 +36,7 @@ #include <linux/sysfs.h> #include <linux/slab.h> #include <linux/cma.h> +#include <linux/hugetlb.h> #include <asm/debugfs.h> #include <asm/page.h> diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 529dcc21c3f9..cecd57e1d046 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -63,6 +63,7 @@ _GLOBAL(load_fp_state) REST_32FPVSRS(0, R4, R3) blr EXPORT_SYMBOL(load_fp_state) +_ASM_NOKPROBE_SYMBOL(load_fp_state); /* used by restore_math */ /* * Store FP state into memory, including FPSCR diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index e25b615e9f9e..755fab9641d6 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -37,6 +37,8 @@ #include <asm/export.h> #include <asm/feature-fixups.h> +#include "head_32.h" + /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ #define LOAD_BAT(n, reg, RA, RB) \ /* see the comment for clear_bats() -- Cort */ \ @@ -160,6 +162,10 @@ __after_mmu_off: bl flush_tlbs bl initial_bats + bl load_segment_registers +#ifdef CONFIG_KASAN + bl early_hash_table +#endif #if defined(CONFIG_BOOTX_TEXT) bl setup_disp_bat #endif @@ -205,7 +211,7 @@ __after_mmu_off: */ turn_on_mmu: mfmsr r0 - ori r0,r0,MSR_DR|MSR_IR + ori r0,r0,MSR_DR|MSR_IR|MSR_RI mtspr SPRN_SRR1,r0 lis r0,start_here@h ori r0,r0,start_here@l @@ -242,103 +248,6 @@ __secondary_hold_spinloop: __secondary_hold_acknowledge: .long -1 -/* - * Exception entry code. This code runs with address translation - * turned off, i.e. using physical addresses. - * We assume sprg3 has the physical address of the current - * task's thread_struct. - */ -#define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcr r10; \ - EXCEPTION_PROLOG_1; \ - EXCEPTION_PROLOG_2 - -#define EXCEPTION_PROLOG_1 \ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ - andi. r11,r11,MSR_PR; \ - tophys(r11,r1); /* use tophys(r1) if kernel */ \ - beq 1f; \ - mfspr r11,SPRN_SPRG_THREAD; \ - lwz r11,TASK_STACK-THREAD(r11); \ - addi r11,r11,THREAD_SIZE; \ - tophys(r11,r11); \ -1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ - - -#define EXCEPTION_PROLOG_2 \ - stw r10,_CCR(r11); /* save registers */ \ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG_SCRATCH1; \ - stw r12,GPR11(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r12,SPRN_SRR0; \ - mfspr r9,SPRN_SRR1; \ - stw r1,GPR1(r11); \ - stw r1,0(r11); \ - tovirt(r1,r11); /* set new kernel sp */ \ - li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ - MTMSRD(r10); /* (except for mach check in rtas) */ \ - stw r0,GPR0(r11); \ - lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \ - addi r10,r10,STACK_FRAME_REGS_MARKER@l; \ - stw r10,8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) - -/* - * Note: code which follows this uses cr0.eq (set if from kernel), - * r11, r12 (SRR0), and r9 (SRR1). - * - * Note2: once we have set r1 we are in a position to take exceptions - * again, and we could thus set MSR:RI at that point. - */ - -/* - * Exception vectors. - */ -#define EXCEPTION(n, label, hdlr, xfer) \ - . = n; \ - DO_KVM n; \ -label: \ - EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - -#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - li r10,MSR_KERNEL; \ - copyee(r10, r9); \ - bl tfer; \ -i##n: \ - .long hdlr; \ - .long ret - -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ - ret_from_except) - /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and putting it back to what it was (unknown_exception) when done. */ @@ -387,7 +296,11 @@ DataAccess: EXCEPTION_PROLOG mfspr r10,SPRN_DSISR stw r10,_DSISR(r11) +#ifdef CONFIG_PPC_KUAP + andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h +#else andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h +#endif bne 1f /* if not, try to put a PTE */ mfspr r4,SPRN_DAR /* into the hash table */ rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ @@ -428,7 +341,7 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, alignment_exception) + EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -449,24 +362,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) bl load_up_fpu /* if from user, just load it up */ b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) /* System call */ . = 0xc00 DO_KVM 0xc00 SystemCall: - EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + SYSCALL_ENTRY 0xc00 /* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) /* * The Altivec unavailable trap is at 0x0f20. Foo. @@ -522,9 +434,9 @@ InstructionTLBMiss: andc. r1,r1,r0 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - ori r1, r1, 0xe05 /* clear out reserved bits */ - andc r1, r0, r1 /* PP = user? 2 : 0 */ + rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + ori r1, r1, 0xe06 /* clear out reserved bits */ + andc r1, r0, r1 /* PP = user? 1 : 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -590,11 +502,11 @@ DataLoadTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ ori r1,r1,0xe04 /* clear out reserved bits */ - andc r1,r0,r1 /* PP = user? rw? 2: 3: 0 */ + andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -670,9 +582,9 @@ DataStoreTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - li r1,0xe05 /* clear out reserved bits & PP lsb */ - andc r1,r0,r1 /* PP = user? 2: 0 */ + rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + li r1,0xe06 /* clear out reserved bits & PP msb */ + andc r1,r0,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -698,35 +610,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #define altivec_assist_exception unknown_exception #endif - EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE) - EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) + EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD) EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) - EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD) + EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD) . = 0x3000 @@ -738,7 +650,7 @@ AltiVecUnavailable: b fast_exception_return #endif /* CONFIG_ALTIVEC */ 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) + EXC_XFER_LITE(0xf20, altivec_unavailable_exception) PerformanceMonitor: EXCEPTION_PROLOG @@ -880,11 +792,24 @@ _ENTRY(__restore_cpu_setup) blr #endif /* !defined(CONFIG_PPC_BOOK3S_32) */ - /* * Load stuff into the MMU. Intended to be called with * IR=0 and DR=0. */ +#ifdef CONFIG_KASAN +early_hash_table: + sync /* Force all PTE updates to finish */ + isync + tlbia /* Clear all TLB entries */ + sync /* wait for tlbia/tlbie to finish */ + TLBSYNC /* ... on all CPUs */ + /* Load the SDR1 register (hash table base & size) */ + lis r6, early_hash - PAGE_OFFSET@h + ori r6, r6, 3 /* 256kB table */ + mtspr SPRN_SDR1, r6 + blr +#endif + load_up_mmu: sync /* Force all PTE updates to finish */ isync @@ -896,14 +821,6 @@ load_up_mmu: tophys(r6,r6) lwz r6,_SDR1@l(r6) mtspr SPRN_SDR1,r6 - li r0,16 /* load up segment register values */ - mtctr r0 /* for context 0 */ - lis r3,0x2000 /* Ku = 1, VSID = 0 */ - li r4,0 -3: mtsrin r3,r4 - addi r3,r3,0x111 /* increment VSID */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b /* Load the BAT registers with the values set up by MMU_init. MMU_init takes care of whether we're on a 601 or not. */ @@ -925,6 +842,32 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +load_segment_registers: + li r0, NUM_USER_SEGMENTS /* load up user segment register values */ + mtctr r0 /* for context 0 */ + li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ +#ifdef CONFIG_PPC_KUEP + oris r3, r3, SR_NX@h /* Set Nx */ +#endif +#ifdef CONFIG_PPC_KUAP + oris r3, r3, SR_KS@h /* Set Ks */ +#endif + li r4, 0 +3: mtsrin r3, r4 + addi r3, r3, 0x111 /* increment VSID */ + addis r4, r4, 0x1000 /* address of next segment */ + bdnz 3b + li r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */ + mtctr r0 /* for context 0 */ + rlwinm r3, r3, 0, ~SR_NX /* Nx = 0 */ + rlwinm r3, r3, 0, ~SR_KS /* Ks = 0 */ + oris r3, r3, SR_KP@h /* Kp = 1 */ +3: mtsrin r3, r4 + addi r3, r3, 0x111 /* increment VSID */ + addis r4, r4, 0x1000 /* address of next segment */ + bdnz 3b + blr + /* * This is where the main kernel code starts. */ @@ -950,11 +893,17 @@ start_here: * Do early platform-specific initialization, * and set up the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init bl __save_cpu_setup bl MMU_init +BEGIN_MMU_FTR_SECTION + bl MMU_init_hw_patch +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) /* * Go back to running unmapped so we can load up new values @@ -1006,7 +955,12 @@ _ENTRY(switch_mmu_context) blt- 4f mulli r3,r3,897 /* multiply context by skew factor */ rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ - addis r3,r3,0x6000 /* Set Ks, Ku bits */ +#ifdef CONFIG_PPC_KUEP + oris r3, r3, SR_NX@h /* Set Nx */ +#endif +#ifdef CONFIG_PPC_KUAP + oris r3, r3, SR_KS@h /* Set Ks */ +#endif li r0,NUM_USER_SEGMENTS mtctr r0 diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h new file mode 100644 index 000000000000..4a692553651f --- /dev/null +++ b/arch/powerpc/kernel/head_32.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __HEAD_32_H__ +#define __HEAD_32_H__ + +#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */ + +/* + * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE. + */ +.macro __LOAD_MSR_KERNEL r, x +.if \x >= 0x8000 + lis \r, (\x)@h + ori \r, \r, (\x)@l +.else + li \r, (\x) +.endif +.endm +#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x + +/* + * Exception entry code. This code runs with address translation + * turned off, i.e. using physical addresses. + * We assume sprg3 has the physical address of the current + * task's thread_struct. + */ + +.macro EXCEPTION_PROLOG + mtspr SPRN_SPRG_SCRATCH0,r10 + mtspr SPRN_SPRG_SCRATCH1,r11 + mfcr r10 + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 +.endm + +.macro EXCEPTION_PROLOG_1 + mfspr r11,SPRN_SRR1 /* check whether user or kernel */ + andi. r11,r11,MSR_PR + tophys(r11,r1) /* use tophys(r1) if kernel */ + beq 1f + mfspr r11,SPRN_SPRG_THREAD + lwz r11,TASK_STACK-THREAD(r11) + addi r11,r11,THREAD_SIZE + tophys(r11,r11) +1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ +.endm + +.macro EXCEPTION_PROLOG_2 + stw r10,_CCR(r11) /* save registers */ + stw r12,GPR12(r11) + stw r9,GPR9(r11) + mfspr r10,SPRN_SPRG_SCRATCH0 + stw r10,GPR10(r11) + mfspr r12,SPRN_SPRG_SCRATCH1 + stw r12,GPR11(r11) + mflr r10 + stw r10,_LINK(r11) + mfspr r12,SPRN_SRR0 + mfspr r9,SPRN_SRR1 + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1,r11) /* set new kernel sp */ +#ifdef CONFIG_40x + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#else + li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */ + MTMSRD(r10) /* (except for mach check in rtas) */ +#endif + stw r0,GPR0(r11) + lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + addi r10,r10,STACK_FRAME_REGS_MARKER@l + stw r10,8(r11) + SAVE_4GPRS(3, r11) + SAVE_2GPRS(7, r11) +.endm + +.macro SYSCALL_ENTRY trapno + mfspr r12,SPRN_SPRG_THREAD + mfcr r10 + lwz r11,TASK_STACK-THREAD(r12) + mflr r9 + addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE + rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ + tophys(r11,r11) + stw r10,_CCR(r11) /* save registers */ + mfspr r10,SPRN_SRR0 + stw r9,_LINK(r11) + mfspr r9,SPRN_SRR1 + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1,r11) /* set new kernel sp */ + stw r10,_NIP(r11) +#ifdef CONFIG_40x + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#else + LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */ + MTMSRD(r10) /* (except for mach check in rtas) */ +#endif + lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + stw r2,GPR2(r11) + addi r10,r10,STACK_FRAME_REGS_MARKER@l + stw r9,_MSR(r11) + li r2, \trapno + 1 + stw r10,8(r11) + stw r2,_TRAP(r11) + SAVE_GPR(0, r11) + SAVE_4GPRS(3, r11) + SAVE_2GPRS(7, r11) + addi r11,r1,STACK_FRAME_OVERHEAD + addi r2,r12,-THREAD + stw r11,PT_REGS(r12) +#if defined(CONFIG_40x) + /* Check to see if the dbcr0 register is set up to debug. Use the + internal debug mode bit to do this. */ + lwz r12,THREAD_DBCR0(r12) + andis. r12,r12,DBCR0_IDM@h +#endif + ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) +#if defined(CONFIG_40x) + beq+ 3f + /* From user and task is ptraced - load up global dbcr0 */ + li r12,-1 /* clear all pending debug events */ + mtspr SPRN_DBSR,r12 + lis r11,global_dbcr0@ha + tophys(r11,r11) + addi r11,r11,global_dbcr0@l + lwz r12,0(r11) + mtspr SPRN_DBCR0,r12 + lwz r12,4(r11) + addi r12,r12,-1 + stw r12,4(r11) +#endif + +3: + tovirt(r2, r2) /* set r2 to current */ + lis r11, transfer_to_syscall@h + ori r11, r11, transfer_to_syscall@l +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * If MSR is changing we need to keep interrupts disabled at this point + * otherwise we might risk taking an interrupt before we tell lockdep + * they are enabled. + */ + LOAD_MSR_KERNEL(r10, MSR_KERNEL) + rlwimi r10, r9, 0, MSR_EE +#else + LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE) +#endif +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) + mtspr SPRN_NRI, r0 +#endif + mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r11 + SYNC + RFI /* jump to handler, enable MMU */ +.endm + +/* + * Note: code which follows this uses cr0.eq (set if from kernel), + * r11, r12 (SRR0), and r9 (SRR1). + * + * Note2: once we have set r1 we are in a position to take exceptions + * again, and we could thus set MSR:RI at that point. + */ + +/* + * Exception vectors. + */ +#ifdef CONFIG_PPC_BOOK3S +#define START_EXCEPTION(n, label) \ + . = n; \ + DO_KVM n; \ +label: + +#else +#define START_EXCEPTION(n, label) \ + . = n; \ +label: + +#endif + +#define EXCEPTION(n, label, hdlr, xfer) \ + START_EXCEPTION(n, label) \ + EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ + li r10,trap; \ + stw r10,_TRAP(r11); \ + LOAD_MSR_KERNEL(r10, msr); \ + bl tfer; \ + .long hdlr; \ + .long ret + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ + ret_from_except) + +#endif /* __HEAD_32_H__ */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a9c934f2319b..cf54b784100d 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -44,6 +44,8 @@ #include <asm/export.h> #include <asm/asm-405.h> +#include "head_32.h" + /* As with the other PowerPC ports, it is expected that when code * execution begins here, the following registers contain valid, yet * optional, information: @@ -99,46 +101,6 @@ _ENTRY(saved_ksp_limit) .space 4 /* - * Exception vector entry code. This code runs with address translation - * turned off (i.e. using physical addresses). We assume SPRG_THREAD has - * the physical address of the current task thread_struct. - * Note that we have to have decremented r1 before we write to any fields - * of the exception frame, since a critical interrupt could occur at any - * time, and it will write to the area immediately below the current r1. - */ -#define NORMAL_EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mtspr SPRN_SPRG_SCRATCH2,r1; \ - mfcr r10; /* save CR in r10 for now */\ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ - andi. r11,r11,MSR_PR; \ - beq 1f; \ - mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\ - lwz r1,TASK_STACK-THREAD(r1); /* this thread's kernel stack */\ - addi r1,r1,THREAD_SIZE; \ -1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\ - tophys(r11,r1); \ - stw r10,_CCR(r11); /* save various registers */\ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG_SCRATCH1; \ - stw r12,GPR11(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH2; \ - mfspr r12,SPRN_SRR0; \ - stw r10,GPR1(r11); \ - mfspr r9,SPRN_SRR1; \ - stw r10,0(r11); \ - rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) - -/* * Exception prolog for critical exceptions. This is a little different * from the normal exception prolog above since a critical exception * can potentially occur at any point during normal exception processing. @@ -177,6 +139,9 @@ _ENTRY(saved_ksp_limit) tovirt(r1,r11); \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ stw r0,GPR0(r11); \ + lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\ + addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ + stw r10, 8(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) @@ -196,53 +161,12 @@ _ENTRY(saved_ksp_limit) /* * Exception vectors. */ -#define START_EXCEPTION(n, label) \ - . = n; \ -label: - -#define EXCEPTION(n, label, hdlr, xfer) \ - START_EXCEPTION(n, label); \ - NORMAL_EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ CRITICAL_EXCEPTION_PROLOG; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, \ - ret_from_crit_exc) - -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - lis r10,msr@h; \ - ori r10,r10,msr@l; \ - copyee(r10, r9); \ - bl tfer; \ - .long hdlr; \ - .long ret - -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ - ret_from_except) - + crit_transfer_to_handler, ret_from_crit_exc) /* * 0x0100 - Critical Interrupt Exception @@ -393,7 +317,7 @@ label: * This is caused by a fetch from non-execute or guarded pages. */ START_EXCEPTION(0x0400, InstructionAccess) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mr r4,r12 /* Pass SRR0 as arg2 */ li r5,0 /* Pass zero as arg3 */ EXC_XFER_LITE(0x400, handle_page_fault) @@ -403,33 +327,32 @@ label: /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ stw r4,_DEAR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, alignment_exception) + EXC_XFER_STD(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r4,SPRN_ESR /* Grab the ESR and save it */ stw r4,_ESR(r11) addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x700, program_check_exception) - EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) - NORMAL_EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + SYSCALL_ENTRY 0xc00 - EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ . = 0x1000 @@ -646,25 +569,25 @@ label: mfspr r10, SPRN_SPRG_SCRATCH0 b InstructionAccess - EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) #ifdef CONFIG_IBM405_ERR51 /* 405GP errata 51 */ START_EXCEPTION(0x1700, Trap_17) b DTLBMiss #else - EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) #endif - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD) /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case @@ -726,11 +649,11 @@ label: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_TEMPLATE(DebugException, 0x2002, \ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ Decrementer: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -738,9 +661,9 @@ Decrementer: /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ FITException: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD; - EXC_XFER_EE(0x1010, unknown_exception) + EXC_XFER_STD(0x1010, unknown_exception) /* Watchdog Timer (WDT) Exception. (from 0x1020) */ WDTException: @@ -748,15 +671,14 @@ WDTException: addi r3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), - NOCOPY, crit_transfer_to_handler, - ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) /* * The other Data TLB exceptions bail out to this point * if they can't resolve the lightweight TLB fault. */ DataAccess: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ @@ -848,6 +770,9 @@ start_here: /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 37117ab11584..f15fba58c744 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -203,6 +203,9 @@ _ENTRY(_start); /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init @@ -278,16 +281,15 @@ interrupt_base: FP_UNAVAILABLE_EXCEPTION #else EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \ - FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) + FloatingPointUnavailable, unknown_exception, EXC_XFER_STD) #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL) - EXC_XFER_EE_LITE(0x0c00, DoSyscall) + SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ - AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION @@ -295,7 +297,7 @@ interrupt_base: /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3fad8d499767..5321a11c2835 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -968,7 +968,9 @@ start_here_multiplatform: /* Restore parameters passed from prom_init/kexec */ mr r3,r31 - bl early_setup /* also sets r13 and SPRG_PACA */ + LOAD_REG_ADDR(r12, DOTSYM(early_setup)) + mtctr r12 + bctrl /* also sets r13 and SPRG_PACA */ LOAD_REG_ADDR(r3, start_here_common) ld r4,PACAKMSR(r13) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 03c73b4c6435..885be7f3d29a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -33,6 +33,8 @@ #include <asm/export.h> #include <asm/code-patching-asm.h> +#include "head_32.h" + #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ #define SIMPLE_KERNEL_ADDRESS 1 @@ -123,102 +125,6 @@ instruction_counter: .space 4 #endif -/* - * Exception entry code. This code runs with address translation - * turned off, i.e. using physical addresses. - * We assume sprg3 has the physical address of the current - * task's thread_struct. - */ -#define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0, r10; \ - mtspr SPRN_SPRG_SCRATCH1, r11; \ - mfcr r10; \ - EXCEPTION_PROLOG_1; \ - EXCEPTION_PROLOG_2 - -#define EXCEPTION_PROLOG_1 \ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ - andi. r11,r11,MSR_PR; \ - tophys(r11,r1); /* use tophys(r1) if kernel */ \ - beq 1f; \ - mfspr r11,SPRN_SPRG_THREAD; \ - lwz r11,TASK_STACK-THREAD(r11); \ - addi r11,r11,THREAD_SIZE; \ - tophys(r11,r11); \ -1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ - - -#define EXCEPTION_PROLOG_2 \ - stw r10,_CCR(r11); /* save registers */ \ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG_SCRATCH1; \ - stw r12,GPR11(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r12,SPRN_SRR0; \ - mfspr r9,SPRN_SRR1; \ - stw r1,GPR1(r11); \ - stw r1,0(r11); \ - tovirt(r1,r11); /* set new kernel sp */ \ - li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ - mtmsr r10; \ - stw r0,GPR0(r11); \ - lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \ - addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ - stw r10, 8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) - -/* - * Note: code which follows this uses cr0.eq (set if from kernel), - * r11, r12 (SRR0), and r9 (SRR1). - * - * Note2: once we have set r1 we are in a position to take exceptions - * again, and we could thus set MSR:RI at that point. - */ - -/* - * Exception vectors. - */ -#define EXCEPTION(n, label, hdlr, xfer) \ - . = n; \ -label: \ - EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - -#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - li r10,MSR_KERNEL; \ - copyee(r10, r9); \ - bl tfer; \ -i##n: \ - .long hdlr; \ - .long ret - -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ - ret_from_except) - /* System reset */ EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD) @@ -261,7 +167,7 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, alignment_exception) + EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -273,19 +179,18 @@ Alignment: /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) /* System call */ . = 0xc00 SystemCall: - EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + SYSCALL_ENTRY 0xc00 /* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. @@ -615,13 +520,13 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to @@ -643,7 +548,7 @@ DataBreakpoint: mfspr r4,SPRN_BAR stw r4,_DAR(r11) mfspr r5,SPRN_DSISR - EXC_XFER_EE(0x1c00, do_break) + EXC_XFER_STD(0x1c00, do_break) 11: mtcr r10 mfspr r10, SPRN_SPRG_SCRATCH0 @@ -663,10 +568,10 @@ InstructionBreakpoint: mfspr r10, SPRN_SPRG_SCRATCH0 rfi #else - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) #endif - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) . = 0x2000 @@ -853,6 +758,9 @@ start_here: /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 1b22a8dea399..bfeb469e8106 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -6,6 +6,8 @@ #include <asm/kvm_asm.h> #include <asm/kvm_booke_hv_asm.h> +#ifdef __ASSEMBLY__ + /* * Macros used for common Book-e exception handling */ @@ -81,6 +83,101 @@ END_BTB_FLUSH_SECTION SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) +.macro SYSCALL_ENTRY trapno intno + mfspr r10, SPRN_SPRG_THREAD +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mtspr SPRN_SPRG_WSCRATCH0, r10 + stw r11, THREAD_NORMSAVE(0)(r10) + stw r13, THREAD_NORMSAVE(2)(r10) + mfcr r13 /* save CR in r13 for now */ + mfspr r11, SPRN_SRR1 + mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ + bf 3, 1975f + b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1 +1975: + mr r12, r13 + lwz r13, THREAD_NORMSAVE(2)(r10) +FTR_SECTION_ELSE +#endif + mfcr r12 +#ifdef CONFIG_KVM_BOOKE_HV +ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) +#endif + BOOKE_CLEAR_BTB(r11) + lwz r11, TASK_STACK - THREAD(r10) + rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ + ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) + stw r12, _CCR(r11) /* save various registers */ + mflr r12 + stw r12,_LINK(r11) + mfspr r12,SPRN_SRR0 + stw r1, GPR1(r11) + mfspr r9,SPRN_SRR1 + stw r1, 0(r11) + mr r1, r11 + stw r12,_NIP(r11) + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ + lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + stw r2,GPR2(r11) + addi r12, r12, STACK_FRAME_REGS_MARKER@l + stw r9,_MSR(r11) + li r2, \trapno + 1 + stw r12, 8(r11) + stw r2,_TRAP(r11) + SAVE_GPR(0, r11) + SAVE_4GPRS(3, r11) + SAVE_2GPRS(7, r11) + + addi r11,r1,STACK_FRAME_OVERHEAD + addi r2,r10,-THREAD + stw r11,PT_REGS(r10) + /* Check to see if the dbcr0 register is set up to debug. Use the + internal debug mode bit to do this. */ + lwz r12,THREAD_DBCR0(r10) + andis. r12,r12,DBCR0_IDM@h + ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) + beq+ 3f + /* From user and task is ptraced - load up global dbcr0 */ + li r12,-1 /* clear all pending debug events */ + mtspr SPRN_DBSR,r12 + lis r11,global_dbcr0@ha + tophys(r11,r11) + addi r11,r11,global_dbcr0@l +#ifdef CONFIG_SMP + lwz r9,TASK_CPU(r2) + slwi r9,r9,3 + add r11,r11,r9 +#endif + lwz r12,0(r11) + mtspr SPRN_DBCR0,r12 + lwz r12,4(r11) + addi r12,r12,-1 + stw r12,4(r11) + +3: + tovirt(r2, r2) /* set r2 to current */ + lis r11, transfer_to_syscall@h + ori r11, r11, transfer_to_syscall@l +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * If MSR is changing we need to keep interrupts disabled at this point + * otherwise we might risk taking an interrupt before we tell lockdep + * they are enabled. + */ + lis r10, MSR_KERNEL@h + ori r10, r10, MSR_KERNEL@l + rlwimi r10, r9, 0, MSR_EE +#else + lis r10, (MSR_KERNEL | MSR_EE)@h + ori r10, r10, (MSR_KERNEL | MSR_EE)@l +#endif + mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r11 + SYNC + RFI /* jump to handler, enable MMU */ +.endm + /* To handle the additional exception priority levels on 40x and Book-E * processors we allocate a stack per additional priority level. * @@ -217,8 +314,7 @@ label: CRITICAL_EXCEPTION_PROLOG(intno); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, \ - ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) #define MCHECK_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(label); \ @@ -227,36 +323,23 @@ label: stw r5,_ESR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, mcheck_transfer_to_handler, \ - ret_from_mcheck_exc) + mcheck_transfer_to_handler, ret_from_mcheck_exc) -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ li r10,trap; \ stw r10,_TRAP(r11); \ lis r10,msr@h; \ ori r10,r10,msr@l; \ - copyee(r10, r9); \ bl tfer; \ .long hdlr; \ .long ret -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - #define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ ret_from_except_full) #define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ ret_from_except) /* Check for a single step debug exception while in an exception @@ -323,7 +406,7 @@ label: /* continue normal handling for a debug exception... */ \ 2: mfspr r4,SPRN_DBSR; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) + EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc) #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ @@ -376,7 +459,7 @@ label: /* continue normal handling for a critical exception... */ \ 2: mfspr r4,SPRN_DBSR; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) #define DATA_STORAGE_EXCEPTION \ START_EXCEPTION(DataStorage) \ @@ -401,7 +484,7 @@ label: mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_EE(0x0600, alignment_exception) + EXC_XFER_STD(0x0600, alignment_exception) #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ @@ -426,9 +509,9 @@ label: bl load_up_fpu; /* if from user, just load it up */ \ b fast_exception_return; \ 1: addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + EXC_XFER_STD(0x800, kernel_fp_unavailable_exception) -#ifndef __ASSEMBLY__ +#else /* __ASSEMBLY__ */ struct exception_regs { unsigned long mas0; unsigned long mas1; diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 32332e24e421..6621f230cc37 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -268,6 +268,9 @@ set_ivor: /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif mr r3,r30 mr r4,r31 bl machine_init @@ -380,7 +383,7 @@ interrupt_base: EXC_XFER_LITE(0x0300, handle_page_fault) 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0x0300, CacheLockingException) + EXC_XFER_LITE(0x0300, CacheLockingException) /* Instruction Storage Interrupt */ INSTRUCTION_STORAGE_EXCEPTION @@ -401,21 +404,20 @@ interrupt_base: #ifdef CONFIG_E200 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - program_check_exception, EXC_XFER_EE) + program_check_exception, EXC_XFER_STD) #else EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) #endif #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG(SYSCALL) - EXC_XFER_EE_LITE(0x0c00, DoSyscall) + SYSCALL_ENTRY 0xc00 SYSCALL /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION @@ -423,7 +425,7 @@ interrupt_base: /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ EXCEPTION(0x3100, FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT @@ -633,25 +635,25 @@ END_BTB_FLUSH_SECTION bl load_up_spe b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0x2010, KernelSPE) + EXC_XFER_LITE(0x2010, KernelSPE) #elif defined(CONFIG_SPE_POSSIBLE) EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) #endif /* CONFIG_SPE_POSSIBLE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, - SPEFloatingPointException, EXC_XFER_EE) + SPEFloatingPointException, EXC_XFER_STD) /* SPE Floating Point Round */ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - SPEFloatingPointRoundException, EXC_XFER_EE) + SPEFloatingPointRoundException, EXC_XFER_STD) #elif defined(CONFIG_SPE_POSSIBLE) EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) #endif /* CONFIG_SPE_POSSIBLE */ @@ -674,10 +676,10 @@ END_BTB_FLUSH_SECTION unknown_exception) /* Hypercall */ - EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE) + EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD) /* Embedded Hypervisor Privilege */ - EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE) + EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD) interrupt_end: diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index fec8a6773119..da307dd93ee3 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -29,11 +29,15 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/smp.h> +#include <linux/debugfs.h> +#include <linux/init.h> #include <asm/hw_breakpoint.h> #include <asm/processor.h> #include <asm/sstep.h> #include <asm/debug.h> +#include <asm/debugfs.h> +#include <asm/hvcall.h> #include <linux/uaccess.h> /* @@ -174,7 +178,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, if (!ppc_breakpoint_available()) return -ENODEV; length_max = 8; /* DABR */ - if (cpu_has_feature(CPU_FTR_DAWR)) { + if (dawr_enabled()) { length_max = 512 ; /* 64 doublewords */ /* DAWR region can't cross 512 boundary */ if ((attr->bp_addr >> 9) != @@ -376,3 +380,59 @@ void hw_breakpoint_pmu_read(struct perf_event *bp) { /* TODO */ } + +bool dawr_force_enable; +EXPORT_SYMBOL_GPL(dawr_force_enable); + +static ssize_t dawr_write_file_bool(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct arch_hw_breakpoint null_brk = {0, 0, 0}; + size_t rc; + + /* Send error to user if they hypervisor won't allow us to write DAWR */ + if ((!dawr_force_enable) && + (firmware_has_feature(FW_FEATURE_LPAR)) && + (set_dawr(&null_brk) != H_SUCCESS)) + return -1; + + rc = debugfs_write_file_bool(file, user_buf, count, ppos); + if (rc) + return rc; + + /* If we are clearing, make sure all CPUs have the DAWR cleared */ + if (!dawr_force_enable) + smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0); + + return rc; +} + +static const struct file_operations dawr_enable_fops = { + .read = debugfs_read_file_bool, + .write = dawr_write_file_bool, + .open = simple_open, + .llseek = default_llseek, +}; + +static int __init dawr_force_setup(void) +{ + dawr_force_enable = false; + + if (cpu_has_feature(CPU_FTR_DAWR)) { + /* Don't setup sysfs file for user control on P8 */ + dawr_force_enable = true; + return 0; + } + + if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) { + /* Turn DAWR off by default, but allow admin to turn it on */ + dawr_force_enable = false; + debugfs_create_file_unsafe("dawr_enable_dangerous", 0600, + powerpc_debugfs_root, + &dawr_force_enable, + &dawr_enable_fops); + } + return 0; +} +arch_initcall(dawr_force_setup); diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 7f5ac2e8581b..2dfbd5d5b932 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -1,956 +1,188 @@ /* - * This file contains idle entry/exit functions for POWER7, - * POWER8 and POWER9 CPUs. + * Copyright 2018, IBM Corporation. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. + * + * This file contains general idle entry/exit functions to save + * and restore stack and NVGPRs which allows C code to call idle + * states that lose GPRs, and it will return transparently with + * SRR1 wakeup reason return value. + * + * The platform / CPU caller must ensure SPRs and any other non-GPR + * state is saved and restored correctly, handle KVM, interrupts, etc. */ -#include <linux/threads.h> -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/cputable.h> -#include <asm/thread_info.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ppc-opcode.h> -#include <asm/hw_irq.h> -#include <asm/kvm_book3s_asm.h> -#include <asm/opal.h> #include <asm/cpuidle.h> -#include <asm/exception-64s.h> -#include <asm/book3s/64/mmu-hash.h> -#include <asm/mmu.h> -#include <asm/asm-compat.h> -#include <asm/feature-fixups.h> - -#undef DEBUG - -/* - * Use unused space in the interrupt stack to save and restore - * registers for winkle support. - */ -#define _MMCR0 GPR0 -#define _SDR1 GPR3 -#define _PTCR GPR3 -#define _RPR GPR4 -#define _SPURR GPR5 -#define _PURR GPR6 -#define _TSCR GPR7 -#define _DSCR GPR8 -#define _AMOR GPR9 -#define _WORT GPR10 -#define _WORC GPR11 -#define _LPCR GPR12 - -#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16 - .text - -/* - * Used by threads before entering deep idle states. Saves SPRs - * in interrupt stack frame - */ -save_sprs_to_stack: - /* - * Note all register i.e per-core, per-subcore or per-thread is saved - * here since any thread in the core might wake up first - */ -BEGIN_FTR_SECTION - /* - * Note - SDR1 is dropped in Power ISA v3. Hence not restoring - * SDR1 here - */ - mfspr r3,SPRN_PTCR - std r3,_PTCR(r1) - mfspr r3,SPRN_LPCR - std r3,_LPCR(r1) -FTR_SECTION_ELSE - mfspr r3,SPRN_SDR1 - std r3,_SDR1(r1) -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) - mfspr r3,SPRN_RPR - std r3,_RPR(r1) - mfspr r3,SPRN_SPURR - std r3,_SPURR(r1) - mfspr r3,SPRN_PURR - std r3,_PURR(r1) - mfspr r3,SPRN_TSCR - std r3,_TSCR(r1) - mfspr r3,SPRN_DSCR - std r3,_DSCR(r1) - mfspr r3,SPRN_AMOR - std r3,_AMOR(r1) - mfspr r3,SPRN_WORT - std r3,_WORT(r1) - mfspr r3,SPRN_WORC - std r3,_WORC(r1) /* - * On POWER9, there are idle states such as stop4, invoked via cpuidle, - * that lose hypervisor resources. In such cases, we need to save - * additional SPRs before entering those idle states so that they can - * be restored to their older values on wakeup from the idle state. + * Desired PSSCR in r3 * - * On POWER8, the only such deep idle state is winkle which is used - * only in the context of CPU-Hotplug, where these additional SPRs are - * reinitiazed to a sane value. Hence there is no need to save/restore - * these SPRs. + * No state will be lost regardless of wakeup mechanism (interrupt or NIA). + * + * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which can + * happen with xscom SRESET and possibly MCE) may clobber volatiles except LR, + * and must blr, to return to caller with r3 set according to caller's expected + * return code (for Book3S/64 that is SRR1). */ -BEGIN_FTR_SECTION - blr -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - -power9_save_additional_sprs: - mfspr r3, SPRN_PID - mfspr r4, SPRN_LDBAR - std r3, STOP_PID(r13) - std r4, STOP_LDBAR(r13) - - mfspr r3, SPRN_FSCR - mfspr r4, SPRN_HFSCR - std r3, STOP_FSCR(r13) - std r4, STOP_HFSCR(r13) - - mfspr r3, SPRN_MMCRA - mfspr r4, SPRN_MMCR0 - std r3, STOP_MMCRA(r13) - std r4, _MMCR0(r1) - - mfspr r3, SPRN_MMCR1 - mfspr r4, SPRN_MMCR2 - std r3, STOP_MMCR1(r13) - std r4, STOP_MMCR2(r13) - blr - -power9_restore_additional_sprs: - ld r3,_LPCR(r1) - ld r4, STOP_PID(r13) - mtspr SPRN_LPCR,r3 - mtspr SPRN_PID, r4 - - ld r3, STOP_LDBAR(r13) - ld r4, STOP_FSCR(r13) - mtspr SPRN_LDBAR, r3 - mtspr SPRN_FSCR, r4 - - ld r3, STOP_HFSCR(r13) - ld r4, STOP_MMCRA(r13) - mtspr SPRN_HFSCR, r3 - mtspr SPRN_MMCRA, r4 - - ld r3, _MMCR0(r1) - ld r4, STOP_MMCR1(r13) - mtspr SPRN_MMCR0, r3 - mtspr SPRN_MMCR1, r4 - - ld r3, STOP_MMCR2(r13) - ld r4, PACA_SPRG_VDSO(r13) - mtspr SPRN_MMCR2, r3 - mtspr SPRN_SPRG3, r4 +_GLOBAL(isa300_idle_stop_noloss) + mtspr SPRN_PSSCR,r3 + PPC_STOP + li r3,0 blr /* - * Used by threads when the lock bit of core_idle_state is set. - * Threads will spin in HMT_LOW until the lock bit is cleared. - * r14 - pointer to core_idle_state - * r15 - used to load contents of core_idle_state - * r9 - used as a temporary variable + * Desired PSSCR in r3 + * + * GPRs may be lost, so they are saved here. Wakeup is by interrupt only. + * The SRESET wakeup returns to this function's caller by calling + * idle_return_gpr_loss with r3 set to desired return value. + * + * A wakeup without GPR loss may alteratively be handled as in + * isa300_idle_stop_noloss and blr directly, as an optimisation. + * + * The caller is responsible for saving/restoring SPRs, MSR, timebase, + * etc. */ - -core_idle_lock_held: - HMT_LOW -3: lwz r15,0(r14) - andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - bne 3b - HMT_MEDIUM - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bne- core_idle_lock_held - blr +_GLOBAL(isa300_idle_stop_mayloss) + mtspr SPRN_PSSCR,r3 + std r1,PACAR1(r13) + mflr r4 + mfcr r5 + /* use stack red zone rather than a new frame for saving regs */ + std r2,-8*0(r1) + std r14,-8*1(r1) + std r15,-8*2(r1) + std r16,-8*3(r1) + std r17,-8*4(r1) + std r18,-8*5(r1) + std r19,-8*6(r1) + std r20,-8*7(r1) + std r21,-8*8(r1) + std r22,-8*9(r1) + std r23,-8*10(r1) + std r24,-8*11(r1) + std r25,-8*12(r1) + std r26,-8*13(r1) + std r27,-8*14(r1) + std r28,-8*15(r1) + std r29,-8*16(r1) + std r30,-8*17(r1) + std r31,-8*18(r1) + std r4,-8*19(r1) + std r5,-8*20(r1) + /* 168 bytes */ + PPC_STOP + b . /* catch bugs */ /* - * Pass requested state in r3: - * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 - * - Requested PSSCR value in POWER9 + * Desired return value in r3 + * + * The idle wakeup SRESET interrupt can call this after calling + * to return to the idle sleep function caller with r3 as the return code. * - * Address of idle handler to branch to in realmode in r4 + * This must not be used if idle was entered via a _noloss function (use + * a simple blr instead). */ -pnv_powersave_common: - /* Use r3 to pass state nap/sleep/winkle */ - /* NAP is a state loss, we create a regs frame on the - * stack, fill it up with the state we care about and - * stick a pointer to it in PACAR1. We really only - * need to save PC, some CR bits and the NV GPRs, - * but for now an interrupt frame will do. - */ - mtctr r4 - - mflr r0 - std r0,16(r1) - stdu r1,-INT_FRAME_SIZE(r1) - std r0,_LINK(r1) - std r0,_NIP(r1) - - /* We haven't lost state ... yet */ - li r0,0 - stb r0,PACA_NAPSTATELOST(r13) - - /* Continue saving state */ - SAVE_GPR(2, r1) - SAVE_NVGPRS(r1) - mfcr r5 - std r5,_CCR(r1) - std r1,PACAR1(r13) - -BEGIN_FTR_SECTION - /* - * POWER9 does not require real mode to stop, and presently does not - * set hwthread_state for KVM (threads don't share MMU context), so - * we can remain in virtual mode for this. - */ - bctr -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - /* - * POWER8 - * Go to real mode to do the nap, as required by the architecture. - * Also, we need to be in real mode before setting hwthread_state, - * because as soon as we do that, another thread can switch - * the MMU context to the guest. - */ - LOAD_REG_IMMEDIATE(r7, MSR_IDLE) - mtmsrd r7,0 - bctr +_GLOBAL(idle_return_gpr_loss) + ld r1,PACAR1(r13) + ld r4,-8*19(r1) + ld r5,-8*20(r1) + mtlr r4 + mtcr r5 + /* + * KVM nap requires r2 to be saved, rather than just restoring it + * from PACATOC. This could be avoided for that less common case + * if KVM saved its r2. + */ + ld r2,-8*0(r1) + ld r14,-8*1(r1) + ld r15,-8*2(r1) + ld r16,-8*3(r1) + ld r17,-8*4(r1) + ld r18,-8*5(r1) + ld r19,-8*6(r1) + ld r20,-8*7(r1) + ld r21,-8*8(r1) + ld r22,-8*9(r1) + ld r23,-8*10(r1) + ld r24,-8*11(r1) + ld r25,-8*12(r1) + ld r26,-8*13(r1) + ld r27,-8*14(r1) + ld r28,-8*15(r1) + ld r29,-8*16(r1) + ld r30,-8*17(r1) + ld r31,-8*18(r1) + blr /* * This is the sequence required to execute idle instructions, as * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. + * + * The 0(r1) slot is used to save r2 in isa206, so use that here. */ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r0,0(r1); \ + std r2,0(r1); \ ptesync; \ - ld r0,0(r1); \ -236: cmpd cr0,r0,r0; \ + ld r2,0(r1); \ +236: cmpd cr0,r2,r2; \ bne 236b; \ - IDLE_INST; - - - .globl pnv_enter_arch207_idle_mode -pnv_enter_arch207_idle_mode: -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're entering idle */ - li r4,KVM_HWTHREAD_IN_IDLE - /******************************************************/ - /* N O T E W E L L ! ! ! N O T E W E L L */ - /* The following store to HSTATE_HWTHREAD_STATE(r13) */ - /* MUST occur in real mode, i.e. with the MMU off, */ - /* and the MMU must stay off until we clear this flag */ - /* and test HSTATE_HWTHREAD_REQ(r13) in */ - /* pnv_powersave_wakeup in this file. */ - /* The reason is that another thread can switch the */ - /* MMU to a guest context whenever this flag is set */ - /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ - /* that would potentially cause this thread to start */ - /* executing instructions from guest memory in */ - /* hypervisor mode, leading to a host crash or data */ - /* corruption, or worse. */ - /******************************************************/ - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - stb r3,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr3,r3,PNV_THREAD_SLEEP - bge cr3,2f - IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) - /* No return */ -2: - /* Sleep or winkle */ - lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) - li r5,0 - beq cr3,3f - lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h -3: -lwarx_loop1: - lwarx r15,0,r14 - - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bnel- core_idle_lock_held - - add r15,r15,r5 /* Add if winkle */ - andc r15,r15,r7 /* Clear thread bit */ - - andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS - -/* - * If cr0 = 0, then current thread is the last thread of the core entering - * sleep. Last thread needs to execute the hardware bug workaround code if - * required by the platform. - * Make the workaround call unconditionally here. The below branch call is - * patched out when the idle states are discovered if the platform does not - * require it. - */ -.global pnv_fastsleep_workaround_at_entry -pnv_fastsleep_workaround_at_entry: - beq fastsleep_workaround_at_entry - - stwcx. r15,0,r14 - bne- lwarx_loop1 - isync - -common_enter: /* common code for all the threads entering sleep or winkle */ - bgt cr3,enter_winkle - IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) - -fastsleep_workaround_at_entry: - oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - stwcx. r15,0,r14 - bne- lwarx_loop1 - isync - - /* Fast sleep workaround */ - li r3,1 - li r4,1 - bl opal_config_cpu_idle_state - - /* Unlock */ - xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - lwsync - stw r15,0(r14) - b common_enter - -enter_winkle: - bl save_sprs_to_stack - - IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) - -/* - * r3 - PSSCR value corresponding to the requested stop state. - */ -power_enter_stop: -/* - * Check if we are executing the lite variant with ESL=EC=0 - */ - andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED - clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ - bne .Lhandle_esl_ec_set - PPC_STOP - li r3,0 /* Since we didn't lose state, return 0 */ - std r3, PACA_REQ_PSSCR(r13) - - /* - * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so - * it can determine if the wakeup reason is an HMI in - * CHECK_HMI_INTERRUPT. - * - * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup - * reason, so there is no point setting r12 to SRR1. - * - * Further, we clear r12 here, so that we don't accidentally enter the - * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI. - */ - li r12, 0 - b pnv_wakeup_noloss - -.Lhandle_esl_ec_set: -BEGIN_FTR_SECTION - /* - * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after - * a state-loss idle. Saving and restoring MMCR0 over idle is a - * workaround. - */ - mfspr r4,SPRN_MMCR0 - std r4,_MMCR0(r1) -END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1) + IDLE_INST; \ + b . /* catch bugs */ /* - * Check if the requested state is a deep idle state. - */ - LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) - ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) - cmpd r3,r4 - bge .Lhandle_deep_stop - PPC_STOP /* Does not return (system reset interrupt) */ - -.Lhandle_deep_stop: -/* - * Entering deep idle state. - * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to - * stack and enter stop - */ - lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) - -lwarx_loop_stop: - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bnel- core_idle_lock_held - andc r15,r15,r7 /* Clear thread bit */ - - stwcx. r15,0,r14 - bne- lwarx_loop_stop - isync - - bl save_sprs_to_stack - - PPC_STOP /* Does not return (system reset interrupt) */ - -/* - * Entered with MSR[EE]=0 and no soft-masked interrupts pending. - * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE). - */ -_GLOBAL(power7_idle_insn) - /* Now check if user or arch enabled NAP mode */ - LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode) - b pnv_powersave_common - -#define CHECK_HMI_INTERRUPT \ -BEGIN_FTR_SECTION_NESTED(66); \ - rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \ -FTR_SECTION_ELSE_NESTED(66); \ - rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \ -ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ - cmpwi r0,0xa; /* Hypervisor maintenance ? */ \ - bne+ 20f; \ - /* Invoke opal call to handle hmi */ \ - ld r2,PACATOC(r13); \ - ld r1,PACAR1(r13); \ - std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r3,0; /* NULL argument */ \ - bl hmi_exception_realmode; \ - nop; \ - ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ -20: nop; - -/* - * Entered with MSR[EE]=0 and no soft-masked interrupts pending. - * r3 contains desired PSSCR register value. + * Desired instruction type in r3 * - * Offline (CPU unplug) case also must notify KVM that the CPU is - * idle. - */ -_GLOBAL(power9_offline_stop) -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* - * Tell KVM we're entering idle. - * This does not have to be done in real mode because the P9 MMU - * is independent per-thread. Some steppings share radix/hash mode - * between threads, but in that case KVM has a barrier sync in real - * mode before and after switching between radix and hash. - */ - li r4,KVM_HWTHREAD_IN_IDLE - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - /* fall through */ - -_GLOBAL(power9_idle_stop) - std r3, PACA_REQ_PSSCR(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -BEGIN_FTR_SECTION - sync - lwz r5, PACA_DONT_STOP(r13) - cmpwi r5, 0 - bne 1f -END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) -#endif - mtspr SPRN_PSSCR,r3 - LOAD_REG_ADDR(r4,power_enter_stop) - b pnv_powersave_common - /* No return */ -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -1: - /* - * We get here when TM / thread reconfiguration bug workaround - * code wants to get the CPU into SMT4 mode, and therefore - * we are being asked not to stop. - */ - li r3, 0 - std r3, PACA_REQ_PSSCR(r13) - blr /* return 0 for wakeup cause / SRR1 value */ -#endif - -/* - * Called from machine check handler for powersave wakeups. - * Low level machine check processing has already been done. Now just - * go through the wake up path to get everything in order. + * GPRs may be lost, so they are saved here. Wakeup is by interrupt only. + * The SRESET wakeup returns to this function's caller by calling + * idle_return_gpr_loss with r3 set to desired return value. * - * r3 - The original SRR1 value. - * Original SRR[01] have been clobbered. - * MSR_RI is clear. - */ -.global pnv_powersave_wakeup_mce -pnv_powersave_wakeup_mce: - /* Set cr3 for pnv_powersave_wakeup */ - rlwinm r11,r3,47-31,30,31 - cmpwi cr3,r11,2 - - /* - * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake - * reason into r12, which allows reuse of the system reset wakeup - * code without being mistaken for another type of wakeup. - */ - oris r12,r3,SRR1_WAKEMCE_RESVD@h - - b pnv_powersave_wakeup - -/* - * Called from reset vector for powersave wakeups. - * cr3 - set to gt if waking up with partial/complete hypervisor state loss - * r12 - SRR1 - */ -.global pnv_powersave_wakeup -pnv_powersave_wakeup: - ld r2, PACATOC(r13) - -BEGIN_FTR_SECTION - bl pnv_restore_hyp_resource_arch300 -FTR_SECTION_ELSE - bl pnv_restore_hyp_resource_arch207 -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) - - li r0,PNV_THREAD_RUNNING - stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ - - mr r3,r12 - -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - lbz r0,HSTATE_HWTHREAD_STATE(r13) - cmpwi r0,KVM_HWTHREAD_IN_KERNEL - beq 0f - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync -0: lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 1f - b kvm_start_guest -1: -#endif - - /* Return SRR1 from power7_nap() */ - blt cr3,pnv_wakeup_noloss - b pnv_wakeup_loss - -/* - * Check whether we have woken up with hypervisor state loss. - * If yes, restore hypervisor state and return back to link. + * A wakeup without GPR loss may alteratively be handled as in + * isa300_idle_stop_noloss and blr directly, as an optimisation. * - * cr3 - set to gt if waking up with partial/complete hypervisor state loss - */ -pnv_restore_hyp_resource_arch300: - /* - * Workaround for POWER9, if we lost resources, the ERAT - * might have been mixed up and needs flushing. We also need - * to reload MMCR0 (see comment above). We also need to set - * then clear bit 60 in MMCRA to ensure the PMU starts running. - */ - blt cr3,1f -BEGIN_FTR_SECTION - PPC_INVALIDATE_ERAT - ld r1,PACAR1(r13) - ld r4,_MMCR0(r1) - mtspr SPRN_MMCR0,r4 -END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1) - mfspr r4,SPRN_MMCRA - ori r4,r4,(1 << (63-60)) - mtspr SPRN_MMCRA,r4 - xori r4,r4,(1 << (63-60)) - mtspr SPRN_MMCRA,r4 -1: - /* - * POWER ISA 3. Use PSSCR to determine if we - * are waking up from deep idle state - */ - LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) - ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) - - /* - * 0-3 bits correspond to Power-Saving Level Status - * which indicates the idle state we are waking up from - */ - mfspr r5, SPRN_PSSCR - rldicl r5,r5,4,60 - li r0, 0 /* clear requested_psscr to say we're awake */ - std r0, PACA_REQ_PSSCR(r13) - cmpd cr4,r5,r4 - bge cr4,pnv_wakeup_tb_loss /* returns to caller */ - - blr /* Waking up without hypervisor state loss. */ - -/* Same calling convention as arch300 */ -pnv_restore_hyp_resource_arch207: - /* - * POWER ISA 2.07 or less. - * Check if we slept with sleep or winkle. - */ - lbz r4,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r4,PNV_THREAD_NAP - bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ - - /* - * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking - * up from nap. At this stage CR3 shouldn't contains 'gt' since that - * indicates we are waking with hypervisor state loss from nap. - */ - bgt cr3,. - - blr /* Waking up without hypervisor state loss */ - -/* - * Called if waking up from idle state which can cause either partial or - * complete hyp state loss. - * In POWER8, called if waking up from fastsleep or winkle - * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state - * - * r13 - PACA - * cr3 - gt if waking up with partial/complete hypervisor state loss - * - * If ISA300: - * cr4 - gt or eq if waking up from complete hypervisor state loss. + * The caller is responsible for saving/restoring SPRs, MSR, timebase, + * etc. * - * If ISA207: - * r4 - PACA_THREAD_IDLE_STATE + * This must be called in real-mode (MSR_IDLE). */ -pnv_wakeup_tb_loss: - ld r1,PACAR1(r13) - /* - * Before entering any idle state, the NVGPRs are saved in the stack. - * If there was a state loss, or PACA_NAPSTATELOST was set, then the - * NVGPRs are restored. If we are here, it is likely that state is lost, - * but not guaranteed -- neither ISA207 nor ISA300 tests to reach - * here are the same as the test to restore NVGPRS: - * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300, - * and SRR1 test for restoring NVGPRs. - * - * We are about to clobber NVGPRs now, so set NAPSTATELOST to - * guarantee they will always be restored. This might be tightened - * with careful reading of specs (particularly for ISA300) but this - * is already a slow wakeup path and it's simpler to be safe. - */ - li r0,1 - stb r0,PACA_NAPSTATELOST(r13) - - /* - * - * Save SRR1 and LR in NVGPRs as they might be clobbered in - * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required - * to determine the wakeup reason if we branch to kvm_start_guest. LR - * is required to return back to reset vector after hypervisor state - * restore is complete. - */ - mr r19,r12 - mr r18,r4 - mflr r17 -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) - lbz r7,PACA_THREAD_MASK(r13) - - /* - * Take the core lock to synchronize against other threads. - * - * Lock bit is set in one of the 2 cases- - * a. In the sleep/winkle enter path, the last thread is executing - * fastsleep workaround code. - * b. In the wake up path, another thread is executing fastsleep - * workaround undo code or resyncing timebase or restoring context - * In either case loop until the lock bit is cleared. - */ -1: - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bnel- core_idle_lock_held - oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - stwcx. r15,0,r14 - bne- 1b - isync - - andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS - cmpwi cr2,r9,0 - - /* - * At this stage - * cr2 - eq if first thread to wakeup in core - * cr3- gt if waking up with partial/complete hypervisor state loss - * ISA300: - * cr4 - gt or eq if waking up from complete hypervisor state loss. - */ - -BEGIN_FTR_SECTION - /* - * Were we in winkle? - * If yes, check if all threads were in winkle, decrement our - * winkle count, set all thread winkle bits if all were in winkle. - * Check if our thread has a winkle bit set, and set cr4 accordingly - * (to match ISA300, above). Pseudo-code for core idle state - * transitions for ISA207 is as follows (everything happens atomically - * due to store conditional and/or lock bit): - * - * nap_idle() { } - * nap_wake() { } - * - * sleep_idle() - * { - * core_idle_state &= ~thread_in_core - * } - * - * sleep_wake() - * { - * bool first_in_core, first_in_subcore; - * - * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; - * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; - * - * core_idle_state |= thread_in_core; - * } - * - * winkle_idle() - * { - * core_idle_state &= ~thread_in_core; - * core_idle_state += 1 << WINKLE_COUNT_SHIFT; - * } - * - * winkle_wake() - * { - * bool first_in_core, first_in_subcore, winkle_state_lost; - * - * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; - * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; - * - * core_idle_state |= thread_in_core; - * - * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT)) - * core_idle_state |= THREAD_WINKLE_BITS; - * core_idle_state -= 1 << WINKLE_COUNT_SHIFT; - * - * winkle_state_lost = core_idle_state & - * (thread_in_core << WINKLE_THREAD_SHIFT); - * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT); - * } - * - */ - cmpwi r18,PNV_THREAD_WINKLE +_GLOBAL(isa206_idle_insn_mayloss) + std r1,PACAR1(r13) + mflr r4 + mfcr r5 + /* use stack red zone rather than a new frame for saving regs */ + std r2,-8*0(r1) + std r14,-8*1(r1) + std r15,-8*2(r1) + std r16,-8*3(r1) + std r17,-8*4(r1) + std r18,-8*5(r1) + std r19,-8*6(r1) + std r20,-8*7(r1) + std r21,-8*8(r1) + std r22,-8*9(r1) + std r23,-8*10(r1) + std r24,-8*11(r1) + std r25,-8*12(r1) + std r26,-8*13(r1) + std r27,-8*14(r1) + std r28,-8*15(r1) + std r29,-8*16(r1) + std r30,-8*17(r1) + std r31,-8*18(r1) + std r4,-8*19(r1) + std r5,-8*20(r1) + cmpwi r3,PNV_THREAD_NAP + bne 1f + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) +1: cmpwi r3,PNV_THREAD_SLEEP bne 2f - andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h - subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h - beq 2f - ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */ -2: - /* Shift thread bit to winkle mask, then test if this thread is set, - * and remove it from the winkle bits */ - slwi r8,r7,8 - and r8,r8,r15 - andc r15,r15,r8 - cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */ - - lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) - and r4,r4,r15 - cmpwi r4,0 /* Check if first in subcore */ - - or r15,r15,r7 /* Set thread bit */ - beq first_thread_in_subcore -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - - or r15,r15,r7 /* Set thread bit */ - beq cr2,first_thread_in_core - - /* Not first thread in core or subcore to wake up */ - b clear_lock - -first_thread_in_subcore: - /* - * If waking up from sleep, subcore state is not lost. Hence - * skip subcore state restore - */ - blt cr4,subcore_state_restored - - /* Restore per-subcore state */ - ld r4,_SDR1(r1) - mtspr SPRN_SDR1,r4 - - ld r4,_RPR(r1) - mtspr SPRN_RPR,r4 - ld r4,_AMOR(r1) - mtspr SPRN_AMOR,r4 - -subcore_state_restored: - /* - * Check if the thread is also the first thread in the core. If not, - * skip to clear_lock. - */ - bne cr2,clear_lock - -first_thread_in_core: - - /* - * First thread in the core waking up from any state which can cause - * partial or complete hypervisor state loss. It needs to - * call the fastsleep workaround code if the platform requires it. - * Call it unconditionally here. The below branch instruction will - * be patched out if the platform does not have fastsleep or does not - * require the workaround. Patching will be performed during the - * discovery of idle-states. - */ -.global pnv_fastsleep_workaround_at_exit -pnv_fastsleep_workaround_at_exit: - b fastsleep_workaround_at_exit - -timebase_resync: - /* - * Use cr3 which indicates that we are waking up with atleast partial - * hypervisor state loss to determine if TIMEBASE RESYNC is needed. - */ - ble cr3,.Ltb_resynced - /* Time base re-sync */ - bl opal_resync_timebase; - /* - * If waking up from sleep (POWER8), per core state - * is not lost, skip to clear_lock. - */ -.Ltb_resynced: - blt cr4,clear_lock - - /* - * First thread in the core to wake up and its waking up with - * complete hypervisor state loss. Restore per core hypervisor - * state. - */ -BEGIN_FTR_SECTION - ld r4,_PTCR(r1) - mtspr SPRN_PTCR,r4 - ld r4,_RPR(r1) - mtspr SPRN_RPR,r4 - ld r4,_AMOR(r1) - mtspr SPRN_AMOR,r4 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - - ld r4,_TSCR(r1) - mtspr SPRN_TSCR,r4 - ld r4,_WORC(r1) - mtspr SPRN_WORC,r4 - -clear_lock: - xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - lwsync - stw r15,0(r14) - -common_exit: - /* - * Common to all threads. - * - * If waking up from sleep, hypervisor state is not lost. Hence - * skip hypervisor state restore. - */ - blt cr4,hypervisor_state_restored - - /* Waking up from winkle */ - -BEGIN_MMU_FTR_SECTION - b no_segments -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) - /* Restore SLB from PACA */ - ld r8,PACA_SLBSHADOWPTR(r13) - - .rept SLB_NUM_BOLTED - li r3, SLBSHADOW_SAVEAREA - LDX_BE r5, r8, r3 - addi r3, r3, 8 - LDX_BE r6, r8, r3 - andis. r7,r5,SLB_ESID_V@h - beq 1f - slbmte r6,r5 -1: addi r8,r8,16 - .endr -no_segments: - - /* Restore per thread state */ - - ld r4,_SPURR(r1) - mtspr SPRN_SPURR,r4 - ld r4,_PURR(r1) - mtspr SPRN_PURR,r4 - ld r4,_DSCR(r1) - mtspr SPRN_DSCR,r4 - ld r4,_WORT(r1) - mtspr SPRN_WORT,r4 - - /* Call cur_cpu_spec->cpu_restore() */ - LOAD_REG_ADDR(r4, cur_cpu_spec) - ld r4,0(r4) - ld r12,CPU_SPEC_RESTORE(r4) -#ifdef PPC64_ELF_ABI_v1 - ld r12,0(r12) -#endif - mtctr r12 - bctrl - -/* - * On POWER9, we can come here on wakeup from a cpuidle stop state. - * Hence restore the additional SPRs to the saved value. - * - * On POWER8, we come here only on winkle. Since winkle is used - * only in the case of CPU-Hotplug, we don't need to restore - * the additional SPRs. - */ -BEGIN_FTR_SECTION - bl power9_restore_additional_sprs -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) -hypervisor_state_restored: - - mr r12,r19 - mtlr r17 - blr /* return to pnv_powersave_wakeup */ - -fastsleep_workaround_at_exit: - li r3,1 - li r4,0 - bl opal_config_cpu_idle_state - b timebase_resync - -/* - * R3 here contains the value that will be returned to the caller - * of power7_nap. - * R12 contains SRR1 for CHECK_HMI_INTERRUPT. - */ -.global pnv_wakeup_loss -pnv_wakeup_loss: - ld r1,PACAR1(r13) -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - REST_NVGPRS(r1) - REST_GPR(2, r1) - ld r4,PACAKMSR(r13) - ld r5,_LINK(r1) - ld r6,_CCR(r1) - addi r1,r1,INT_FRAME_SIZE - mtlr r5 - mtcr r6 - mtmsrd r4 - blr + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) +2: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) -/* - * R3 here contains the value that will be returned to the caller - * of power7_nap. - * R12 contains SRR1 for CHECK_HMI_INTERRUPT. - */ -pnv_wakeup_noloss: - lbz r0,PACA_NAPSTATELOST(r13) - cmpwi r0,0 - bne pnv_wakeup_loss - ld r1,PACAR1(r13) -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - ld r4,PACAKMSR(r13) - ld r5,_NIP(r1) - ld r6,_CCR(r1) - addi r1,r1,INT_FRAME_SIZE - mtlr r5 - mtcr r6 - mtmsrd r4 - blr diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8a936723c791..ada901af4950 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -81,10 +81,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); -int __irq_offset_value; - #ifdef CONFIG_PPC32 -EXPORT_SYMBOL(__irq_offset_value); atomic_t ppc_n_lost_interrupts; #ifdef CONFIG_TAU_INT @@ -261,16 +258,9 @@ notrace void arch_local_irq_restore(unsigned long mask) */ irq_happened = get_irq_happened(); if (!irq_happened) { - /* - * FIXME. Here we'd like to be able to do: - * - * #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - * WARN_ON(!(mfmsr() & MSR_EE)); - * #endif - * - * But currently it hits in a few paths, we should fix those and - * enable the warning. - */ +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(!(mfmsr() & MSR_EE)); +#endif return; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index b5fec1f9751a..4581377cfc98 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -112,6 +112,7 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->srr1 = regs->msr; mce->gpr3 = regs->gpr[3]; mce->in_use = 1; + mce->cpu = get_paca()->paca_index; /* Mark it recovered if we have handled it and MSR(RI=1). */ if (handled && (regs->msr & MSR_RI)) @@ -121,6 +122,8 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->initiator = mce_err->initiator; mce->severity = mce_err->severity; + mce->sync_error = mce_err->sync_error; + mce->error_class = mce_err->error_class; /* * Populate the mce error_type and type-specific error_type. @@ -310,7 +313,11 @@ static void machine_check_process_queued_event(struct irq_work *work) void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest) { - const char *level, *sevstr, *subtype; + const char *level, *sevstr, *subtype, *err_type; + uint64_t ea = 0, pa = 0; + int n = 0; + char dar_str[50]; + char pa_str[50]; static const char *mc_ue_types[] = { "Indeterminate", "Instruction fetch", @@ -357,6 +364,13 @@ void machine_check_print_event_info(struct machine_check_event *evt, "Store (timeout)", "Page table walk Load/Store (timeout)", }; + static const char *mc_error_class[] = { + "Unknown", + "Hardware error", + "Probable Hardware error (some chance of software cause)", + "Software error", + "Probable Software error (some chance of hardware cause)", + }; /* Print things out */ if (evt->version != MCE_V1) { @@ -371,9 +385,9 @@ void machine_check_print_event_info(struct machine_check_event *evt, break; case MCE_SEV_WARNING: level = KERN_WARNING; - sevstr = ""; + sevstr = "Warning"; break; - case MCE_SEV_ERROR_SYNC: + case MCE_SEV_SEVERE: level = KERN_ERR; sevstr = "Severe"; break; @@ -384,101 +398,107 @@ void machine_check_print_event_info(struct machine_check_event *evt, break; } - printk("%s%s Machine check interrupt [%s]\n", level, sevstr, - evt->disposition == MCE_DISPOSITION_RECOVERED ? - "Recovered" : "Not recovered"); - - if (in_guest) { - printk("%s Guest NIP: %016llx\n", level, evt->srr0); - } else if (user_mode) { - printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, - evt->srr0, current->pid, current->comm); - } else { - printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, - (void *)evt->srr0); - } - - printk("%s Initiator: %s\n", level, - evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); switch (evt->error_type) { case MCE_ERROR_TYPE_UE: + err_type = "UE"; subtype = evt->u.ue_error.ue_error_type < ARRAY_SIZE(mc_ue_types) ? mc_ue_types[evt->u.ue_error.ue_error_type] : "Unknown"; - printk("%s Error type: UE [%s]\n", level, subtype); if (evt->u.ue_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.ue_error.effective_address); + ea = evt->u.ue_error.effective_address; if (evt->u.ue_error.physical_address_provided) - printk("%s Physical address: %016llx\n", - level, evt->u.ue_error.physical_address); + pa = evt->u.ue_error.physical_address; break; case MCE_ERROR_TYPE_SLB: + err_type = "SLB"; subtype = evt->u.slb_error.slb_error_type < ARRAY_SIZE(mc_slb_types) ? mc_slb_types[evt->u.slb_error.slb_error_type] : "Unknown"; - printk("%s Error type: SLB [%s]\n", level, subtype); if (evt->u.slb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.slb_error.effective_address); + ea = evt->u.slb_error.effective_address; break; case MCE_ERROR_TYPE_ERAT: + err_type = "ERAT"; subtype = evt->u.erat_error.erat_error_type < ARRAY_SIZE(mc_erat_types) ? mc_erat_types[evt->u.erat_error.erat_error_type] : "Unknown"; - printk("%s Error type: ERAT [%s]\n", level, subtype); if (evt->u.erat_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.erat_error.effective_address); + ea = evt->u.erat_error.effective_address; break; case MCE_ERROR_TYPE_TLB: + err_type = "TLB"; subtype = evt->u.tlb_error.tlb_error_type < ARRAY_SIZE(mc_tlb_types) ? mc_tlb_types[evt->u.tlb_error.tlb_error_type] : "Unknown"; - printk("%s Error type: TLB [%s]\n", level, subtype); if (evt->u.tlb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.tlb_error.effective_address); + ea = evt->u.tlb_error.effective_address; break; case MCE_ERROR_TYPE_USER: + err_type = "User"; subtype = evt->u.user_error.user_error_type < ARRAY_SIZE(mc_user_types) ? mc_user_types[evt->u.user_error.user_error_type] : "Unknown"; - printk("%s Error type: User [%s]\n", level, subtype); if (evt->u.user_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.user_error.effective_address); + ea = evt->u.user_error.effective_address; break; case MCE_ERROR_TYPE_RA: + err_type = "Real address"; subtype = evt->u.ra_error.ra_error_type < ARRAY_SIZE(mc_ra_types) ? mc_ra_types[evt->u.ra_error.ra_error_type] : "Unknown"; - printk("%s Error type: Real address [%s]\n", level, subtype); if (evt->u.ra_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.ra_error.effective_address); + ea = evt->u.ra_error.effective_address; break; case MCE_ERROR_TYPE_LINK: + err_type = "Link"; subtype = evt->u.link_error.link_error_type < ARRAY_SIZE(mc_link_types) ? mc_link_types[evt->u.link_error.link_error_type] : "Unknown"; - printk("%s Error type: Link [%s]\n", level, subtype); if (evt->u.link_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.link_error.effective_address); + ea = evt->u.link_error.effective_address; break; default: case MCE_ERROR_TYPE_UNKNOWN: - printk("%s Error type: Unknown\n", level); + err_type = "Unknown"; + subtype = ""; break; } + + dar_str[0] = pa_str[0] = '\0'; + if (ea && evt->srr0 != ea) { + /* Load/Store address */ + n = sprintf(dar_str, "DAR: %016llx ", ea); + if (pa) + sprintf(dar_str + n, "paddr: %016llx ", pa); + } else if (pa) { + sprintf(pa_str, " paddr: %016llx", pa); + } + + printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", + level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", + err_type, subtype, dar_str, + evt->disposition == MCE_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + + if (in_guest || user_mode) { + printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n", + level, evt->cpu, current->pid, current->comm, + in_guest ? "Guest " : "", evt->srr0, pa_str); + } else { + printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n", + level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str); + } + + subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ? + mc_error_class[evt->error_class] : "Unknown"; + printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype); } EXPORT_SYMBOL_GPL(machine_check_print_event_info); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 6b800eec31f2..b5e876efe864 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -36,7 +36,7 @@ * Convert an address related to an mm to a PFN. NOTE: we are in real * mode, we could potentially race with page table updates. */ -static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) +unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) { pte_t *ptep; unsigned long flags; @@ -131,213 +131,232 @@ struct mce_ierror_table { bool nip_valid; /* nip is a valid indicator of faulting address */ unsigned int error_type; unsigned int error_subtype; + unsigned int error_class; unsigned int initiator; unsigned int severity; + bool sync_error; }; static const struct mce_ierror_table mce_p7_ierror_table[] = { { 0x00000000001c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000100000, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000180000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; static const struct mce_ierror_table mce_p8_ierror_table[] = { { 0x00000000081c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000100000, true, - MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000180000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008000000, true, - MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008040000, true, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; static const struct mce_ierror_table mce_p9_ierror_table[] = { { 0x00000000081c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000100000, true, - MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000180000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008000000, true, - MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008040000, true, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000080c0000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008100000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008140000, false, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, - MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ { 0x00000000081c0000, 0x0000000008180000, false, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ -{ 0x00000000081c0000, 0x00000000081c0000, true, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; struct mce_derror_table { unsigned long dsisr_value; bool dar_valid; /* dar is a valid indicator of faulting address */ unsigned int error_type; unsigned int error_subtype; + unsigned int error_class; unsigned int initiator; unsigned int severity; + bool sync_error; }; static const struct mce_derror_table mce_p7_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000040, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; static const struct mce_derror_table mce_p8_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00002000, true, - MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00001000, true, MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000200, true, MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; static const struct mce_derror_table mce_p9_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00002000, true, - MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00001000, true, MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000200, false, - MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000040, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000020, false, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000010, false, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000008, false, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr, uint64_t *phys_addr) @@ -404,6 +423,7 @@ static int mce_handle_ierror(struct pt_regs *regs, /* now fill in mce_error_info */ mce_err->error_type = table[i].error_type; + mce_err->error_class = table[i].error_class; switch (table[i].error_type) { case MCE_ERROR_TYPE_UE: mce_err->u.ue_error_type = table[i].error_subtype; @@ -427,11 +447,12 @@ static int mce_handle_ierror(struct pt_regs *regs, mce_err->u.link_error_type = table[i].error_subtype; break; } + mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; if (table[i].nip_valid) { *addr = regs->nip; - if (mce_err->severity == MCE_SEV_ERROR_SYNC && + if (mce_err->sync_error && table[i].error_type == MCE_ERROR_TYPE_UE) { unsigned long pfn; @@ -448,8 +469,10 @@ static int mce_handle_ierror(struct pt_regs *regs, } mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; - mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->error_class = MCE_ECLASS_UNKNOWN; + mce_err->severity = MCE_SEV_SEVERE; mce_err->initiator = MCE_INITIATOR_CPU; + mce_err->sync_error = true; return 0; } @@ -496,6 +519,7 @@ static int mce_handle_derror(struct pt_regs *regs, /* now fill in mce_error_info */ mce_err->error_type = table[i].error_type; + mce_err->error_class = table[i].error_class; switch (table[i].error_type) { case MCE_ERROR_TYPE_UE: mce_err->u.ue_error_type = table[i].error_subtype; @@ -519,11 +543,12 @@ static int mce_handle_derror(struct pt_regs *regs, mce_err->u.link_error_type = table[i].error_subtype; break; } + mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; if (table[i].dar_valid) *addr = regs->dar; - else if (mce_err->severity == MCE_SEV_ERROR_SYNC && + else if (mce_err->sync_error && table[i].error_type == MCE_ERROR_TYPE_UE) { /* * We do a maximum of 4 nested MCE calls, see @@ -539,8 +564,10 @@ static int mce_handle_derror(struct pt_regs *regs, return handled; mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; - mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->error_class = MCE_ECLASS_UNKNOWN; + mce_err->severity = MCE_SEV_SEVERE; mce_err->initiator = MCE_INITIATOR_CPU; + mce_err->sync_error = true; return 0; } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index e7382abee868..9cc91d03ab62 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -267,12 +267,12 @@ void copy_mm_to_paca(struct mm_struct *mm) get_paca()->mm_ctx_id = context->id; #ifdef CONFIG_PPC_MM_SLICES - VM_BUG_ON(!mm->context.slb_addr_limit); - get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit; - memcpy(&get_paca()->mm_ctx_low_slices_psize, - &context->low_slices_psize, sizeof(context->low_slices_psize)); - memcpy(&get_paca()->mm_ctx_high_slices_psize, - &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); + VM_BUG_ON(!mm_ctx_slb_addr_limit(context)); + get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context); + memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context), + LOW_SLICE_ARRAY_SZ); + memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context), + TASK_SLICE_ARRAY_SZ(context)); #else /* CONFIG_PPC_MM_SLICES */ get_paca()->mm_ctx_user_psize = context->user_psize; get_paca()->mm_ctx_sllp = context->sllp; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index dd9e0d5386ee..87da40129927 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -67,6 +67,7 @@ #include <asm/cpu_has_feature.h> #include <asm/asm-prototypes.h> #include <asm/stacktrace.h> +#include <asm/hw_breakpoint.h> #include <linux/kprobes.h> #include <linux/kdebug.h> @@ -133,7 +134,8 @@ static int __init enable_strict_msr_control(char *str) } early_param("ppc_strict_facility_enable", enable_strict_msr_control); -unsigned long msr_check_and_set(unsigned long bits) +/* notrace because it's called by restore_math */ +unsigned long notrace msr_check_and_set(unsigned long bits) { unsigned long oldmsr = mfmsr(); unsigned long newmsr; @@ -152,7 +154,8 @@ unsigned long msr_check_and_set(unsigned long bits) } EXPORT_SYMBOL_GPL(msr_check_and_set); -void __msr_check_and_clear(unsigned long bits) +/* notrace because it's called by restore_math */ +void notrace __msr_check_and_clear(unsigned long bits) { unsigned long oldmsr = mfmsr(); unsigned long newmsr; @@ -525,7 +528,17 @@ void giveup_all(struct task_struct *tsk) } EXPORT_SYMBOL(giveup_all); -void restore_math(struct pt_regs *regs) +/* + * The exception exit path calls restore_math() with interrupts hard disabled + * but the soft irq state not "reconciled". ftrace code that calls + * local_irq_save/restore causes warnings. + * + * Rather than complicate the exit path, just don't trace restore_math. This + * could be done by having ftrace entry code check for this un-reconciled + * condition where MSR[EE]=0 and PACA_IRQ_HARD_DIS is not set, and + * temporarily fix it up for the duration of the ftrace call. + */ +void notrace restore_math(struct pt_regs *regs) { unsigned long msr; @@ -784,7 +797,7 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk) return __set_dabr(dabr, dabrx); } -static inline int set_dawr(struct arch_hw_breakpoint *brk) +int set_dawr(struct arch_hw_breakpoint *brk) { unsigned long dawr, dawrx, mrd; @@ -816,7 +829,7 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk) { memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); - if (cpu_has_feature(CPU_FTR_DAWR)) + if (dawr_enabled()) // Power8 or later set_dawr(brk); else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) @@ -830,8 +843,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk) /* Check if we have DAWR or DABR hardware */ bool ppc_breakpoint_available(void) { - if (cpu_has_feature(CPU_FTR_DAWR)) - return true; /* POWER8 DAWR */ + if (dawr_enabled()) + return true; /* POWER8 DAWR or POWER9 forced DAWR */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) return false; /* POWER9 with DAWR disabled */ /* DABR: Everything but POWER8 and POWER9 */ @@ -1151,11 +1164,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, thread_pkey_regs_restore(new_thread, old_thread); } -#ifdef CONFIG_PPC_BOOK3S_64 -#define CP_SIZE 128 -static const u8 dummy_copy_buffer[CP_SIZE] __attribute__((aligned(CP_SIZE))); -#endif - struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { @@ -1729,7 +1737,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ #ifdef CONFIG_PPC_BOOK3S_64 - preload_new_slb_context(start, sp); + if (!radix_enabled()) + preload_new_slb_context(start, sp); #endif #endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index f33ff4163a51..523bb99d7676 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -154,10 +154,8 @@ static struct prom_t __prombss prom; static unsigned long __prombss prom_entry; -#define PROM_SCRATCH_SIZE 256 - static char __prombss of_stdout_device[256]; -static char __prombss prom_scratch[PROM_SCRATCH_SIZE]; +static char __prombss prom_scratch[256]; static unsigned long __prombss dt_header_start; static unsigned long __prombss dt_struct_start, dt_struct_end; @@ -224,6 +222,135 @@ static bool __prombss rtas_has_query_cpu_stopped; #define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR) #define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR) +/* Copied from lib/string.c and lib/kstrtox.c */ + +static int __init prom_strcmp(const char *cs, const char *ct) +{ + unsigned char c1, c2; + + while (1) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + } + return 0; +} + +static char __init *prom_strcpy(char *dest, const char *src) +{ + char *tmp = dest; + + while ((*dest++ = *src++) != '\0') + /* nothing */; + return tmp; +} + +static int __init prom_strncmp(const char *cs, const char *ct, size_t count) +{ + unsigned char c1, c2; + + while (count) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + count--; + } + return 0; +} + +static size_t __init prom_strlen(const char *s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +static int __init prom_memcmp(const void *cs, const void *ct, size_t count) +{ + const unsigned char *su1, *su2; + int res = 0; + + for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) + if ((res = *su1 - *su2) != 0) + break; + return res; +} + +static char __init *prom_strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = prom_strlen(s2); + if (!l2) + return (char *)s1; + l1 = prom_strlen(s1); + while (l1 >= l2) { + l1--; + if (!prom_memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} + +static size_t __init prom_strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = prom_strlen(src); + + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + +#ifdef CONFIG_PPC_PSERIES +static int __init prom_strtobool(const char *s, bool *res) +{ + if (!s) + return -EINVAL; + + switch (s[0]) { + case 'y': + case 'Y': + case '1': + *res = true; + return 0; + case 'n': + case 'N': + case '0': + *res = false; + return 0; + case 'o': + case 'O': + switch (s[1]) { + case 'n': + case 'N': + *res = true; + return 0; + case 'f': + case 'F': + *res = false; + return 0; + default: + break; + } + default: + break; + } + + return -EINVAL; +} +#endif /* This is the one and *ONLY* place where we actually call open * firmware. @@ -555,7 +682,7 @@ static int __init prom_setprop(phandle node, const char *nodename, add_string(&p, tohex((u32)(unsigned long) value)); add_string(&p, tohex(valuelen)); add_string(&p, tohex(ADDR(pname))); - add_string(&p, tohex(strlen(pname))); + add_string(&p, tohex(prom_strlen(pname))); add_string(&p, "property"); *p = 0; return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd); @@ -631,33 +758,30 @@ static void __init early_cmdline_parse(void) const char *opt; char *p; - int l __maybe_unused = 0; + int l = 0; prom_cmd_line[0] = 0; p = prom_cmd_line; if ((long)prom.chosen > 0) l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1); -#ifdef CONFIG_CMDLINE - if (l <= 0 || p[0] == '\0') /* dbl check */ - strlcpy(prom_cmd_line, - CONFIG_CMDLINE, sizeof(prom_cmd_line)); -#endif /* CONFIG_CMDLINE */ + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */ + prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line)); prom_printf("command line: %s\n", prom_cmd_line); #ifdef CONFIG_PPC64 - opt = strstr(prom_cmd_line, "iommu="); + opt = prom_strstr(prom_cmd_line, "iommu="); if (opt) { prom_printf("iommu opt is: %s\n", opt); opt += 6; while (*opt && *opt == ' ') opt++; - if (!strncmp(opt, "off", 3)) + if (!prom_strncmp(opt, "off", 3)) prom_iommu_off = 1; - else if (!strncmp(opt, "force", 5)) + else if (!prom_strncmp(opt, "force", 5)) prom_iommu_force_on = 1; } #endif - opt = strstr(prom_cmd_line, "mem="); + opt = prom_strstr(prom_cmd_line, "mem="); if (opt) { opt += 4; prom_memory_limit = prom_memparse(opt, (const char **)&opt); @@ -669,13 +793,13 @@ static void __init early_cmdline_parse(void) #ifdef CONFIG_PPC_PSERIES prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); - opt = strstr(prom_cmd_line, "disable_radix"); + opt = prom_strstr(prom_cmd_line, "disable_radix"); if (opt) { opt += 13; if (*opt && *opt == '=') { bool val; - if (kstrtobool(++opt, &val)) + if (prom_strtobool(++opt, &val)) prom_radix_disable = false; else prom_radix_disable = val; @@ -1028,7 +1152,7 @@ static int __init prom_count_smt_threads(void) type[0] = 0; prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "cpu")) + if (prom_strcmp(type, "cpu")) continue; /* * There is an entry for each smt thread, each entry being @@ -1138,8 +1262,14 @@ static void __init prom_check_platform_support(void) int prop_len = prom_getproplen(prom.chosen, "ibm,arch-vec-5-platform-support"); - /* First copy the architecture vec template */ - ibm_architecture_vec = ibm_architecture_vec_template; + /* + * First copy the architecture vec template + * + * use memcpy() instead of *vec = *vec_template so that GCC replaces it + * by __memcpy() when KASAN is active + */ + memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template, + sizeof(ibm_architecture_vec)); if (prop_len > 1) { int i; @@ -1475,7 +1605,7 @@ static void __init prom_init_mem(void) */ prom_getprop(node, "name", type, sizeof(type)); } - if (strcmp(type, "memory")) + if (prom_strcmp(type, "memory")) continue; plen = prom_getprop(node, "reg", regbuf, sizeof(regbuf)); @@ -1487,8 +1617,8 @@ static void __init prom_init_mem(void) endp = p + (plen / sizeof(cell_t)); #ifdef DEBUG_PROM - memset(path, 0, PROM_SCRATCH_SIZE); - call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + memset(path, 0, sizeof(prom_scratch)); + call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1); prom_debug(" node %s :\n", path); #endif /* DEBUG_PROM */ @@ -1756,19 +1886,19 @@ static void __init prom_initialize_tce_table(void) prom_getprop(node, "device_type", type, sizeof(type)); prom_getprop(node, "model", model, sizeof(model)); - if ((type[0] == 0) || (strstr(type, "pci") == NULL)) + if ((type[0] == 0) || (prom_strstr(type, "pci") == NULL)) continue; /* Keep the old logic intact to avoid regression. */ if (compatible[0] != 0) { - if ((strstr(compatible, "python") == NULL) && - (strstr(compatible, "Speedwagon") == NULL) && - (strstr(compatible, "Winnipeg") == NULL)) + if ((prom_strstr(compatible, "python") == NULL) && + (prom_strstr(compatible, "Speedwagon") == NULL) && + (prom_strstr(compatible, "Winnipeg") == NULL)) continue; } else if (model[0] != 0) { - if ((strstr(model, "ython") == NULL) && - (strstr(model, "peedwagon") == NULL) && - (strstr(model, "innipeg") == NULL)) + if ((prom_strstr(model, "ython") == NULL) && + (prom_strstr(model, "peedwagon") == NULL) && + (prom_strstr(model, "innipeg") == NULL)) continue; } @@ -1796,10 +1926,10 @@ static void __init prom_initialize_tce_table(void) local_alloc_bottom = base; /* It seems OF doesn't null-terminate the path :-( */ - memset(path, 0, PROM_SCRATCH_SIZE); + memset(path, 0, sizeof(prom_scratch)); /* Call OF to setup the TCE hardware */ if (call_prom("package-to-path", 3, 1, node, - path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) { + path, sizeof(prom_scratch) - 1) == PROM_ERROR) { prom_printf("package-to-path failed\n"); } @@ -1917,12 +2047,12 @@ static void __init prom_hold_cpus(void) type[0] = 0; prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "cpu") != 0) + if (prom_strcmp(type, "cpu") != 0) continue; /* Skip non-configured cpus. */ if (prom_getprop(node, "status", type, sizeof(type)) > 0) - if (strcmp(type, "okay") != 0) + if (prom_strcmp(type, "okay") != 0) continue; reg = cpu_to_be32(-1); /* make sparse happy */ @@ -1998,9 +2128,9 @@ static void __init prom_find_mmu(void) return; version[sizeof(version) - 1] = 0; /* XXX might need to add other versions here */ - if (strcmp(version, "Open Firmware, 1.0.5") == 0) + if (prom_strcmp(version, "Open Firmware, 1.0.5") == 0) of_workarounds = OF_WA_CLAIM; - else if (strncmp(version, "FirmWorks,3.", 12) == 0) { + else if (prom_strncmp(version, "FirmWorks,3.", 12) == 0) { of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL; call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim"); } else @@ -2033,7 +2163,7 @@ static void __init prom_init_stdout(void) call_prom("instance-to-path", 3, 1, prom.stdout, path, 255); prom_printf("OF stdout device is: %s\n", of_stdout_device); prom_setprop(prom.chosen, "/chosen", "linux,stdout-path", - path, strlen(path) + 1); + path, prom_strlen(path) + 1); /* instance-to-package fails on PA-Semi */ stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout); @@ -2043,7 +2173,7 @@ static void __init prom_init_stdout(void) /* If it's a display, note it */ memset(type, 0, sizeof(type)); prom_getprop(stdout_node, "device_type", type, sizeof(type)); - if (strcmp(type, "display") == 0) + if (prom_strcmp(type, "display") == 0) prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0); } } @@ -2064,19 +2194,19 @@ static int __init prom_find_machine_type(void) compat[len] = 0; while (i < len) { char *p = &compat[i]; - int sl = strlen(p); + int sl = prom_strlen(p); if (sl == 0) break; - if (strstr(p, "Power Macintosh") || - strstr(p, "MacRISC")) + if (prom_strstr(p, "Power Macintosh") || + prom_strstr(p, "MacRISC")) return PLATFORM_POWERMAC; #ifdef CONFIG_PPC64 /* We must make sure we don't detect the IBM Cell * blades as pSeries due to some firmware issues, * so we do it here. */ - if (strstr(p, "IBM,CBEA") || - strstr(p, "IBM,CPBW-1.0")) + if (prom_strstr(p, "IBM,CBEA") || + prom_strstr(p, "IBM,CPBW-1.0")) return PLATFORM_GENERIC; #endif /* CONFIG_PPC64 */ i += sl + 1; @@ -2093,7 +2223,7 @@ static int __init prom_find_machine_type(void) compat, sizeof(compat)-1); if (len <= 0) return PLATFORM_GENERIC; - if (strcmp(compat, "chrp")) + if (prom_strcmp(compat, "chrp")) return PLATFORM_GENERIC; /* Default to pSeries. We need to know if we are running LPAR */ @@ -2155,19 +2285,19 @@ static void __init prom_check_displays(void) for (node = 0; prom_next_node(&node); ) { memset(type, 0, sizeof(type)); prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "display") != 0) + if (prom_strcmp(type, "display") != 0) continue; /* It seems OF doesn't null-terminate the path :-( */ path = prom_scratch; - memset(path, 0, PROM_SCRATCH_SIZE); + memset(path, 0, sizeof(prom_scratch)); /* * leave some room at the end of the path for appending extra * arguments */ if (call_prom("package-to-path", 3, 1, node, path, - PROM_SCRATCH_SIZE-10) == PROM_ERROR) + sizeof(prom_scratch) - 10) == PROM_ERROR) continue; prom_printf("found display : %s, opening... ", path); @@ -2259,9 +2389,9 @@ static unsigned long __init dt_find_string(char *str) s = os = (char *)dt_string_start; s += 4; while (s < (char *)dt_string_end) { - if (strcmp(s, str) == 0) + if (prom_strcmp(s, str) == 0) return s - os; - s += strlen(s) + 1; + s += prom_strlen(s) + 1; } return 0; } @@ -2294,7 +2424,7 @@ static void __init scan_dt_build_strings(phandle node, } /* skip "name" */ - if (strcmp(namep, "name") == 0) { + if (prom_strcmp(namep, "name") == 0) { *mem_start = (unsigned long)namep; prev_name = "name"; continue; @@ -2306,7 +2436,7 @@ static void __init scan_dt_build_strings(phandle node, namep = sstart + soff; } else { /* Trim off some if we can */ - *mem_start = (unsigned long)namep + strlen(namep) + 1; + *mem_start = (unsigned long)namep + prom_strlen(namep) + 1; dt_string_end = *mem_start; } prev_name = namep; @@ -2363,8 +2493,8 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, /* get it again for debugging */ path = prom_scratch; - memset(path, 0, PROM_SCRATCH_SIZE); - call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + memset(path, 0, sizeof(prom_scratch)); + call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1); /* get and store all properties */ prev_name = ""; @@ -2375,7 +2505,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, break; /* skip "name" */ - if (strcmp(pname, "name") == 0) { + if (prom_strcmp(pname, "name") == 0) { prev_name = "name"; continue; } @@ -2406,7 +2536,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, call_prom("getprop", 4, 1, node, pname, valp, l); *mem_start = _ALIGN(*mem_start, 4); - if (!strcmp(pname, "phandle")) + if (!prom_strcmp(pname, "phandle")) has_phandle = 1; } @@ -2476,8 +2606,8 @@ static void __init flatten_device_tree(void) /* Add "phandle" in there, we'll need it */ namep = make_room(&mem_start, &mem_end, 16, 1); - strcpy(namep, "phandle"); - mem_start = (unsigned long)namep + strlen(namep) + 1; + prom_strcpy(namep, "phandle"); + mem_start = (unsigned long)namep + prom_strlen(namep) + 1; /* Build string array */ prom_printf("Building dt strings...\n"); @@ -2799,7 +2929,7 @@ static void __init fixup_device_tree_efika(void) rv = prom_getprop(node, "model", prop, sizeof(prop)); if (rv == PROM_ERROR) return; - if (strcmp(prop, "EFIKA5K2")) + if (prom_strcmp(prop, "EFIKA5K2")) return; prom_printf("Applying EFIKA device tree fixups\n"); @@ -2807,13 +2937,13 @@ static void __init fixup_device_tree_efika(void) /* Claiming to be 'chrp' is death */ node = call_prom("finddevice", 1, 1, ADDR("/")); rv = prom_getprop(node, "device_type", prop, sizeof(prop)); - if (rv != PROM_ERROR && (strcmp(prop, "chrp") == 0)) + if (rv != PROM_ERROR && (prom_strcmp(prop, "chrp") == 0)) prom_setprop(node, "/", "device_type", "efika", sizeof("efika")); /* CODEGEN,description is exposed in /proc/cpuinfo so fix that too */ rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop)); - if (rv != PROM_ERROR && (strstr(prop, "CHRP"))) + if (rv != PROM_ERROR && (prom_strstr(prop, "CHRP"))) prom_setprop(node, "/", "CODEGEN,description", "Efika 5200B PowerPC System", sizeof("Efika 5200B PowerPC System")); diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 667df97d2595..4cac45cb5de5 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -16,10 +16,18 @@ # If you really need to reference something from prom_init.o add # it to the list below: +grep "^CONFIG_KASAN=y$" .config >/dev/null +if [ $? -eq 0 ] +then + MEM_FUNCS="__memcpy __memset" +else + MEM_FUNCS="memcpy memset" +fi + WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush -_end enter_prom memcpy memset reloc_offset __secondary_hold +_end enter_prom $MEM_FUNCS reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start -strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224 +logo_linux_clut224 reloc_got2 kernstart_addr memstart_addr linux_banner _stext __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index d9ac7d94656e..684b0b315c32 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -43,6 +43,7 @@ #include <asm/tm.h> #include <asm/asm-prototypes.h> #include <asm/debug.h> +#include <asm/hw_breakpoint.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -3088,7 +3089,7 @@ long arch_ptrace(struct task_struct *child, long request, dbginfo.sizeof_condition = 0; #ifdef CONFIG_HAVE_HW_BREAKPOINT dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; - if (cpu_has_feature(CPU_FTR_DAWR)) + if (dawr_enabled()) dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; #else dbginfo.features = 0; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 70568ccbd9fd..e1c9cf079503 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -104,6 +104,14 @@ static __init int barrier_nospec_debugfs_init(void) return 0; } device_initcall(barrier_nospec_debugfs_init); + +static __init int security_feature_debugfs_init(void) +{ + debugfs_create_x64("security_features", 0400, powerpc_debugfs_root, + (u64 *)&powerpc_security_features); + return 0; +} +device_initcall(security_feature_debugfs_init); #endif /* CONFIG_DEBUG_FS */ #ifdef CONFIG_PPC_FSL_BOOK3E diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2e5dfb6e0823..aad9f5df6ab6 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -67,6 +67,7 @@ #include <asm/livepatch.h> #include <asm/mmu_context.h> #include <asm/cpu_has_feature.h> +#include <asm/kasan.h> #include "setup.h" @@ -133,13 +134,11 @@ int crashing_cpu = -1; /* also used by kexec */ void machine_shutdown(void) { -#ifdef CONFIG_FA_DUMP /* * if fadump is active, cleanup the fadump registration before we * shutdown. */ fadump_cleanup(); -#endif if (ppc_md.machine_shutdown) ppc_md.machine_shutdown(); @@ -200,14 +199,15 @@ static void show_cpuinfo_summary(struct seq_file *m) { struct device_node *root; const char *model = NULL; -#if defined(CONFIG_SMP) && defined(CONFIG_PPC32) unsigned long bogosum = 0; int i; - for_each_online_cpu(i) - bogosum += loops_per_jiffy; - seq_printf(m, "total bogomips\t: %lu.%02lu\n", - bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); -#endif /* CONFIG_SMP && CONFIG_PPC32 */ + + if (IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_PPC32)) { + for_each_online_cpu(i) + bogosum += loops_per_jiffy; + seq_printf(m, "total bogomips\t: %lu.%02lu\n", + bogosum / (500000 / HZ), bogosum / (5000 / HZ) % 100); + } seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); if (ppc_md.name) seq_printf(m, "platform\t: %s\n", ppc_md.name); @@ -221,11 +221,10 @@ static void show_cpuinfo_summary(struct seq_file *m) if (ppc_md.show_cpuinfo != NULL) ppc_md.show_cpuinfo(m); -#ifdef CONFIG_PPC32 /* Display the amount of memory */ - seq_printf(m, "Memory\t\t: %d MB\n", - (unsigned int)(total_memory / (1024 * 1024))); -#endif + if (IS_ENABLED(CONFIG_PPC32)) + seq_printf(m, "Memory\t\t: %d MB\n", + (unsigned int)(total_memory / (1024 * 1024))); } static int show_cpuinfo(struct seq_file *m, void *v) @@ -252,26 +251,24 @@ static int show_cpuinfo(struct seq_file *m, void *v) else seq_printf(m, "unknown (%08x)", pvr); -#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC)) seq_printf(m, ", altivec supported"); -#endif /* CONFIG_ALTIVEC */ seq_printf(m, "\n"); #ifdef CONFIG_TAU - if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) { -#ifdef CONFIG_TAU_AVERAGE - /* more straightforward, but potentially misleading */ - seq_printf(m, "temperature \t: %u C (uncalibrated)\n", - cpu_temp(cpu_id)); -#else - /* show the actual temp sensor range */ - u32 temp; - temp = cpu_temp_both(cpu_id); - seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n", - temp & 0xff, temp >> 16); -#endif + if (cpu_has_feature(CPU_FTR_TAU)) { + if (IS_ENABLED(CONFIG_TAU_AVERAGE)) { + /* more straightforward, but potentially misleading */ + seq_printf(m, "temperature \t: %u C (uncalibrated)\n", + cpu_temp(cpu_id)); + } else { + /* show the actual temp sensor range */ + u32 temp; + temp = cpu_temp_both(cpu_id); + seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n", + temp & 0xff, temp >> 16); + } } #endif /* CONFIG_TAU */ @@ -335,11 +332,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n", maj, min, PVR_VER(pvr), PVR_REV(pvr)); -#ifdef CONFIG_PPC32 - seq_printf(m, "bogomips\t: %lu.%02lu\n", - loops_per_jiffy / (500000/HZ), - (loops_per_jiffy / (5000/HZ)) % 100); -#endif + if (IS_ENABLED(CONFIG_PPC32)) + seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ), + (loops_per_jiffy / (5000 / HZ)) % 100); + seq_printf(m, "\n"); /* If this is the last cpu, print the summary */ @@ -401,8 +397,8 @@ void __init check_for_initrd(void) #ifdef CONFIG_SMP -int threads_per_core, threads_per_subcore, threads_shift; -cpumask_t threads_core_mask; +int threads_per_core, threads_per_subcore, threads_shift __read_mostly; +cpumask_t threads_core_mask __read_mostly; EXPORT_SYMBOL_GPL(threads_per_core); EXPORT_SYMBOL_GPL(threads_per_subcore); EXPORT_SYMBOL_GPL(threads_shift); @@ -740,23 +736,19 @@ void __init setup_panic(void) * BUG() in that case. */ -#ifdef CONFIG_NOT_COHERENT_CACHE -#define KERNEL_COHERENCY 0 -#else -#define KERNEL_COHERENCY 1 -#endif +#define KERNEL_COHERENCY (!IS_ENABLED(CONFIG_NOT_COHERENT_CACHE)) static int __init check_cache_coherency(void) { struct device_node *np; const void *prop; - int devtree_coherency; + bool devtree_coherency; np = of_find_node_by_path("/"); prop = of_get_property(np, "coherency-off", NULL); of_node_put(np); - devtree_coherency = prop ? 0 : 1; + devtree_coherency = prop ? false : true; if (devtree_coherency != KERNEL_COHERENCY) { printk(KERN_ERR @@ -799,12 +791,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev) static __init void print_system_info(void) { pr_info("-----------------------------------------------------\n"); -#ifdef CONFIG_PPC_BOOK3S_64 - pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); -#endif -#ifdef CONFIG_PPC_BOOK3S_32 - pr_info("Hash_size = 0x%lx\n", Hash_size); -#endif pr_info("phys_mem_size = 0x%llx\n", (unsigned long long)memblock_phys_mem_size()); @@ -826,18 +812,7 @@ static __init void print_system_info(void) pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); #endif -#ifdef CONFIG_PPC_BOOK3S_64 - if (htab_address) - pr_info("htab_address = 0x%p\n", htab_address); - if (htab_hash_mask) - pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif -#ifdef CONFIG_PPC_BOOK3S_32 - if (Hash) - pr_info("Hash = 0x%p\n", Hash); - if (Hash_mask) - pr_info("Hash_mask = 0x%lx\n", Hash_mask); -#endif + print_system_hash_info(); if (PHYSICAL_START > 0) pr_info("physical_start = 0x%llx\n", @@ -868,6 +843,8 @@ static void smp_setup_pacas(void) */ void __init setup_arch(char **cmdline_p) { + kasan_init(); + *cmdline_p = boot_command_line; /* Set a half-reasonable default so udelay does something sensible */ @@ -947,20 +924,7 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; -#ifdef CONFIG_PPC_MM_SLICES -#ifdef CONFIG_PPC64 - if (!radix_enabled()) - init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; -#elif defined(CONFIG_PPC_8xx) - init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW; -#else -#error "context.addr_limit not initialized." -#endif -#endif - -#ifdef CONFIG_SPAPR_TCE_IOMMU mm_iommu_init(&init_mm); -#endif irqstack_early_init(); exc_lvl_early_init(); emergency_stack_init(); @@ -969,9 +933,9 @@ void __init setup_arch(char **cmdline_p) early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif + if (IS_ENABLED(CONFIG_DUMMY_CONSOLE)) + conswitchp = &dummy_con; + if (ppc_md.setup_arch) ppc_md.setup_arch(); @@ -983,10 +947,8 @@ void __init setup_arch(char **cmdline_p) /* Initialize the MMU context management stuff. */ mmu_context_init(); -#ifdef CONFIG_PPC64 /* Interrupt code needs to be 64K-aligned. */ - if ((unsigned long)_stext & 0xffff) + if (IS_ENABLED(CONFIG_PPC64) && (unsigned long)_stext & 0xffff) panic("Kernelbase not 64K-aligned (0x%lx)!\n", (unsigned long)_stext); -#endif } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 4a65e08a6042..3fb9f64f88fd 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -64,34 +64,6 @@ EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); /* - * We're called here very early in the boot. - * - * Note that the kernel may be running at an address which is different - * from the address that it was linked at, so we must use RELOC/PTRRELOC - * to access static data (including strings). -- paulus - */ -notrace unsigned long __init early_init(unsigned long dt_ptr) -{ - unsigned long offset = reloc_offset(); - - /* First zero the BSS -- use memset_io, some platforms don't have - * caches on yet */ - memset_io((void __iomem *)PTRRELOC(&__bss_start), 0, - __bss_stop - __bss_start); - - /* - * Identify the CPU type and fix up code sections - * that depend on which cpu we have. - */ - identify_cpu(offset, mfspr(SPRN_PVR)); - - apply_feature_fixups(); - - return KERNELBASE + offset; -} - - -/* * This is run before start_kernel(), the kernel has been relocated * and we are running with enough of the MMU enabled to have our * proper kernel virtual addresses diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4f49e1a3594c..a400854a5036 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -68,6 +68,7 @@ #include <asm/cputhreads.h> #include <asm/hw_irq.h> #include <asm/feature-fixups.h> +#include <asm/kup.h> #include "setup.h" @@ -331,6 +332,12 @@ void __init early_setup(unsigned long dt_ptr) */ configure_exceptions(); + /* + * Configure Kernel Userspace Protection. This needs to happen before + * feature fixups for platforms that implement this using features. + */ + setup_kup(); + /* Apply all the dynamic patching */ apply_feature_fixups(); setup_feature_keys(); @@ -383,6 +390,9 @@ void early_setup_secondary(void) /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); + /* Perform any KUP setup that is per-cpu */ + setup_kup(); + /* * At this point, we can let interrupts switch to virtual mode * (the MMU has been setup), so adjust the MSR in the PACA to diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 6794466f6420..06c299ef6132 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -565,7 +565,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, preempt_disable(); /* pull in MSR TS bits from user context */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + regs->msr |= msr & MSR_TS_MASK; /* * Ensure that TM is enabled in regs->msr before we leave the signal @@ -745,6 +745,31 @@ SYSCALL_DEFINE0(rt_sigreturn) if (MSR_TM_SUSPENDED(mfmsr())) tm_reclaim_current(0); + /* + * Disable MSR[TS] bit also, so, if there is an exception in the + * code below (as a page fault in copy_ckvsx_to_user()), it does + * not recheckpoint this task if there was a context switch inside + * the exception. + * + * A major page fault can indirectly call schedule(). A reschedule + * process in the middle of an exception can have a side effect + * (Changing the CPU MSR[TS] state), since schedule() is called + * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended + * (switch_to() calls tm_recheckpoint() for the 'new' process). In + * this case, the process continues to be the same in the CPU, but + * the CPU state just changed. + * + * This can cause a TM Bad Thing, since the MSR in the stack will + * have the MSR[TS]=0, and this is what will be used to RFID. + * + * Clearing MSR[TS] state here will avoid a recheckpoint if there + * is any process reschedule in kernel space. The MSR[TS] state + * does not need to be saved also, since it will be replaced with + * the MSR[TS] that came from user context later, at + * restore_tm_sigcontexts. + */ + regs->msr &= ~MSR_TS_MASK; + if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; if (MSR_TM_ACTIVE(msr)) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bc0503ef9c9c..325d60633dfa 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -43,7 +43,6 @@ #include <linux/timex.h> #include <linux/kernel_stat.h> #include <linux/time.h> -#include <linux/clockchips.h> #include <linux/init.h> #include <linux/profile.h> #include <linux/cpu.h> @@ -151,6 +150,8 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq); unsigned long ppc_tb_freq; EXPORT_SYMBOL_GPL(ppc_tb_freq); +bool tb_invalid; + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Factor for converting from cputime_t (timebase ticks) to @@ -460,6 +461,13 @@ void __delay(unsigned long loops) diff += 1000000000; spin_cpu_relax(); } while (diff < loops); + } else if (tb_invalid) { + /* + * TB is in error state and isn't ticking anymore. + * HMI handler was unable to recover from TB error. + * Return immediately, so that kernel won't get stuck here. + */ + spin_cpu_relax(); } else { start = get_tbl(); while (get_tbl() - start < loops) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1fd45a8650e1..665f294725cb 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2088,6 +2088,10 @@ void SPEFloatingPointException(struct pt_regs *regs) int code = FPE_FLTUNK; int err; + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + flush_spe_to_thread(current); spefscr = current->thread.spefscr; @@ -2133,6 +2137,10 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) extern int speround_handler(struct pt_regs *regs); int err; + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + preempt_disable(); if (regs->msr & MSR_SPE) giveup_spe(current); diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index ce199f6e4256..06f54d947057 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -26,9 +26,8 @@ GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n -ccflags-y := -shared -fno-common -fno-builtin -ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ - $(call cc-ldoption, -Wl$(comma)--hash-style=both) +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ + -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both asflags-y := -D__VDSO32__ -s obj-y += vdso32_wrapper.o diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 28e7d112aa2f..32ebb3522ea1 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -12,9 +12,8 @@ GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n -ccflags-y := -shared -fno-common -fno-builtin -ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - $(call cc-ldoption, -Wl$(comma)--hash-style=both) +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ + -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both asflags-y := -D__VDSO64__ -s obj-y += vdso64_wrapper.o diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 21165da0052d..8eb867dbad5f 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -21,6 +21,7 @@ _GLOBAL(load_vr_state) REST_32VRS(0,r4,r3) blr EXPORT_SYMBOL(load_vr_state) +_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ /* * Store VMX state into memory, including VSCR. diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3c6ab22a0c4e..af3c15a1d41e 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -77,7 +77,7 @@ static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ -static DEFINE_PER_CPU(struct timer_list, wd_timer); +static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer); static DEFINE_PER_CPU(u64, wd_timer_tb); /* SMP checker bits */ @@ -293,21 +293,21 @@ out: nmi_exit(); } -static void wd_timer_reset(unsigned int cpu, struct timer_list *t) -{ - t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms); - if (wd_timer_period_ms > 1000) - t->expires = __round_jiffies_up(t->expires, cpu); - add_timer_on(t, cpu); -} - -static void wd_timer_fn(struct timer_list *t) +static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return HRTIMER_NORESTART; + + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return HRTIMER_NORESTART; + watchdog_timer_interrupt(cpu); - wd_timer_reset(cpu, t); + hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms)); + + return HRTIMER_RESTART; } void arch_touch_nmi_watchdog(void) @@ -323,37 +323,22 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); -static void start_watchdog_timer_on(unsigned int cpu) -{ - struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); - - per_cpu(wd_timer_tb, cpu) = get_tb(); - - timer_setup(t, wd_timer_fn, TIMER_PINNED); - wd_timer_reset(cpu, t); -} - -static void stop_watchdog_timer_on(unsigned int cpu) -{ - struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); - - del_timer_sync(t); -} - -static int start_wd_on_cpu(unsigned int cpu) +static void start_watchdog(void *arg) { + struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer); + int cpu = smp_processor_id(); unsigned long flags; if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { WARN_ON(1); - return 0; + return; } if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - return 0; + return; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) - return 0; + return; wd_smp_lock(&flags); cpumask_set_cpu(cpu, &wd_cpus_enabled); @@ -363,27 +348,40 @@ static int start_wd_on_cpu(unsigned int cpu) } wd_smp_unlock(&flags); - start_watchdog_timer_on(cpu); + *this_cpu_ptr(&wd_timer_tb) = get_tb(); - return 0; + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = watchdog_timer_fn; + hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms), + HRTIMER_MODE_REL_PINNED); } -static int stop_wd_on_cpu(unsigned int cpu) +static int start_watchdog_on_cpu(unsigned int cpu) { + return smp_call_function_single(cpu, start_watchdog, NULL, true); +} + +static void stop_watchdog(void *arg) +{ + struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer); + int cpu = smp_processor_id(); unsigned long flags; if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) - return 0; /* Can happen in CPU unplug case */ + return; /* Can happen in CPU unplug case */ - stop_watchdog_timer_on(cpu); + hrtimer_cancel(hrtimer); wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_cpus_enabled); wd_smp_unlock(&flags); wd_smp_clear_cpu_pending(cpu, get_tb()); +} - return 0; +static int stop_watchdog_on_cpu(unsigned int cpu) +{ + return smp_call_function_single(cpu, stop_watchdog, NULL, true); } static void watchdog_calc_timeouts(void) @@ -402,7 +400,7 @@ void watchdog_nmi_stop(void) int cpu; for_each_cpu(cpu, &wd_cpus_enabled) - stop_wd_on_cpu(cpu); + stop_watchdog_on_cpu(cpu); } void watchdog_nmi_start(void) @@ -411,7 +409,7 @@ void watchdog_nmi_start(void) watchdog_calc_timeouts(); for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) - start_wd_on_cpu(cpu); + start_watchdog_on_cpu(cpu); } /* @@ -423,7 +421,8 @@ int __init watchdog_nmi_probe(void) err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", - start_wd_on_cpu, stop_wd_on_cpu); + start_watchdog_on_cpu, + stop_watchdog_on_cpu); if (err < 0) { pr_warn("could not be initialized"); return err; |