diff options
Diffstat (limited to 'arch/powerpc/kernel')
64 files changed, 1987 insertions, 1305 deletions
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 92045ed64976..1f1ce8b86d5b 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -24,6 +24,7 @@ #include <asm/disassemble.h> #include <asm/cpu_has_feature.h> #include <asm/sstep.h> +#include <asm/inst.h> struct aligninfo { unsigned char len; @@ -104,7 +105,7 @@ static struct aligninfo spe_aligninfo[32] = { * so we don't need the address swizzling. */ static int emulate_spe(struct pt_regs *regs, unsigned int reg, - unsigned int instr) + struct ppc_inst ppc_instr) { int ret; union { @@ -115,8 +116,9 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, } data, temp; unsigned char __user *p, *addr; unsigned long *evr = ¤t->thread.evr[reg]; - unsigned int nb, flags; + unsigned int nb, flags, instr; + instr = ppc_inst_val(ppc_instr); instr = (instr >> 1) & 0x1f; /* DAR has the operand effective address */ @@ -293,7 +295,7 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, int fix_alignment(struct pt_regs *regs) { - unsigned int instr; + struct ppc_inst instr; struct instruction_op op; int r, type; @@ -303,18 +305,18 @@ int fix_alignment(struct pt_regs *regs) */ CHECK_FULL_REGS(regs); - if (unlikely(__get_user(instr, (unsigned int __user *)regs->nip))) + if (unlikely(__get_user_instr(instr, (void __user *)regs->nip))) return -EFAULT; if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) { /* We don't handle PPC little-endian any more... */ if (cpu_has_feature(CPU_FTR_PPC_LE)) return -EIO; - instr = swab32(instr); + instr = ppc_inst_swab(instr); } #ifdef CONFIG_SPE - if ((instr >> 26) == 0x4) { - int reg = (instr >> 21) & 0x1f; + if (ppc_inst_primary_opcode(instr) == 0x4) { + int reg = (ppc_inst_val(instr) >> 21) & 0x1f; PPC_WARN_ALIGNMENT(spe, regs); return emulate_spe(regs, reg, instr); } @@ -331,7 +333,7 @@ int fix_alignment(struct pt_regs *regs) * when pasting to a co-processor. Furthermore, paste_last is the * synchronisation point for preceding copy/paste sequences. */ - if ((instr & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe)) + if ((ppc_inst_val(instr) & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe)) return -EIO; r = analyse_instr(&op, regs, instr); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index fcf24a365fc0..9b9cde07e396 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -70,6 +70,10 @@ #include <asm/fixmap.h> #endif +#ifdef CONFIG_XMON +#include "../xmon/xmon_bpts.h" +#endif + #define STACK_PT_REGS_OFFSET(sym, val) \ DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val)) @@ -795,5 +799,9 @@ int main(void) DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE)); #endif +#ifdef CONFIG_XMON + DEFINE(BPT_SIZE, BPT_SIZE); +#endif + return 0; } diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f6517f67265a..f8b5ff64b604 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -288,6 +288,7 @@ _GLOBAL(__init_fpu_registers) mtmsr r10 isync blr +_ASM_NOKPROBE_SYMBOL(__init_fpu_registers) /* Definitions for the table use to save CPU states */ @@ -483,4 +484,5 @@ _GLOBAL(__restore_cpu_setup) 1: mtcr r7 blr +_ASM_NOKPROBE_SYMBOL(__restore_cpu_setup) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index a460298c7ddb..efdcfa714106 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -91,10 +91,15 @@ _GLOBAL(__restore_cpu_power8) mtlr r11 blr +_GLOBAL(__setup_cpu_power10) + mflr r11 + bl __init_FSCR_power10 + b 1f + _GLOBAL(__setup_cpu_power9) mflr r11 bl __init_FSCR - bl __init_PMU +1: bl __init_PMU bl __init_hvmode_206 mtlr r11 beqlr @@ -116,10 +121,15 @@ _GLOBAL(__setup_cpu_power9) mtlr r11 blr +_GLOBAL(__restore_cpu_power10) + mflr r11 + bl __init_FSCR_power10 + b 1f + _GLOBAL(__restore_cpu_power9) mflr r11 bl __init_FSCR - bl __init_PMU +1: bl __init_PMU mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 @@ -182,9 +192,15 @@ __init_LPCR_ISA300: isync blr +__init_FSCR_power10: + mfspr r3, SPRN_FSCR + ori r3, r3, FSCR_PREFIX + mtspr SPRN_FSCR, r3 + // fall through + __init_FSCR: mfspr r3,SPRN_FSCR - ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB + ori r3,r3,FSCR_TAR|FSCR_EBB mtspr SPRN_FSCR,r3 blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 13eba2eb46fe..b4066354f073 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -70,6 +70,8 @@ extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power8(void); extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power9(void); +extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_power10(void); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); extern long __machine_check_early_realmode_p9(struct pt_regs *regs); @@ -119,6 +121,10 @@ extern void __restore_cpu_e6500(void); PPC_FEATURE2_ARCH_3_00 | \ PPC_FEATURE2_HAS_IEEE128 | \ PPC_FEATURE2_DARN ) +#define COMMON_USER_POWER10 COMMON_USER_POWER9 +#define COMMON_USER2_POWER10 (COMMON_USER2_POWER9 | \ + PPC_FEATURE2_ARCH_3_1 | \ + PPC_FEATURE2_MMA) #ifdef CONFIG_PPC_BOOK3E_64 #define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) @@ -367,6 +373,22 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_power9, .platform = "power9", }, + { /* 3.1-compliant processor, i.e. Power10 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000006, + .cpu_name = "POWER10 (architected)", + .cpu_features = CPU_FTRS_POWER10, + .cpu_user_features = COMMON_USER_POWER10, + .cpu_user_features2 = COMMON_USER2_POWER10, + .mmu_features = MMU_FTRS_POWER10, + .icache_bsize = 128, + .dcache_bsize = 128, + .oprofile_type = PPC_OPROFILE_INVALID, + .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .platform = "power10", + }, { /* Power7 */ .pvr_mask = 0xffff0000, .pvr_value = 0x003f0000, @@ -1232,69 +1254,6 @@ static struct cpu_spec __initdata cpu_specs[] = { }, #endif /* CONFIG_PPC_8xx */ #ifdef CONFIG_40x - { /* 403GC */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x00200200, - .cpu_name = "403GC", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 16, - .dcache_bsize = 16, - .machine_check = machine_check_4xx, - .platform = "ppc403", - }, - { /* 403GCX */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x00201400, - .cpu_name = "403GCX", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 16, - .dcache_bsize = 16, - .machine_check = machine_check_4xx, - .platform = "ppc403", - }, - { /* 403G ?? */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00200000, - .cpu_name = "403G ??", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 16, - .dcache_bsize = 16, - .machine_check = machine_check_4xx, - .platform = "ppc403", - }, - { /* 405GP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x40110000, - .cpu_name = "405GP", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* STB 03xxx */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x40130000, - .cpu_name = "STB03xxx", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, { /* STB 04xxx */ .pvr_mask = 0xffff0000, .pvr_value = 0x41810000, @@ -1385,32 +1344,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_4xx, .platform = "ppc405", }, - { /* Xilinx Virtex-II Pro */ - .pvr_mask = 0xfffff000, - .pvr_value = 0x20010000, - .cpu_name = "Virtex-II Pro", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* Xilinx Virtex-4 FX */ - .pvr_mask = 0xfffff000, - .pvr_value = 0x20011000, - .cpu_name = "Virtex-4 FX", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, { /* 405EP */ .pvr_mask = 0xffff0000, .pvr_value = 0x51210000, @@ -1800,19 +1733,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, - { /* 440 in Xilinx Virtex-5 FXT */ - .pvr_mask = 0xfffffff0, - .pvr_value = 0x7ff21910, - .cpu_name = "440 in Virtex-5 FXT", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440x5, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, { /* 460EX */ .pvr_mask = 0xffff0006, .pvr_value = 0x13020002, diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 05745ddbd229..735e89337398 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -18,6 +18,7 @@ #include <asm/firmware.h> #include <linux/uaccess.h> #include <asm/rtas.h> +#include <asm/inst.h> #ifdef DEBUG #include <asm/udbg.h> @@ -34,7 +35,7 @@ void __init reserve_kdump_trampoline(void) static void __init create_trampoline(unsigned long addr) { - unsigned int *p = (unsigned int *)addr; + struct ppc_inst *p = (struct ppc_inst *)addr; /* The maximum range of a single instruction branch, is the current * instruction's address + (32 MB - 4) bytes. For the trampoline we @@ -44,8 +45,8 @@ static void __init create_trampoline(unsigned long addr) * branch to "addr" we jump to ("addr" + 32 MB). Although it requires * two instructions it doesn't require any registers. */ - patch_instruction(p, PPC_INST_NOP); - patch_branch(++p, addr + PHYSICAL_START, 0); + patch_instruction(p, ppc_inst(PPC_INST_NOP)); + patch_branch((void *)p + 4, addr + PHYSICAL_START, 0); } void __init setup_kdump_trampoline(void) diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index cc14aa6c4a1b..500f52fa4711 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -16,7 +16,7 @@ bool dawr_force_enable; EXPORT_SYMBOL_GPL(dawr_force_enable); -int set_dawr(struct arch_hw_breakpoint *brk) +int set_dawr(int nr, struct arch_hw_breakpoint *brk) { unsigned long dawr, dawrx, mrd; @@ -39,15 +39,24 @@ int set_dawr(struct arch_hw_breakpoint *brk) if (ppc_md.set_dawr) return ppc_md.set_dawr(dawr, dawrx); - mtspr(SPRN_DAWR, dawr); - mtspr(SPRN_DAWRX, dawrx); + if (nr == 0) { + mtspr(SPRN_DAWR0, dawr); + mtspr(SPRN_DAWRX0, dawrx); + } else { + mtspr(SPRN_DAWR1, dawr); + mtspr(SPRN_DAWRX1, dawrx); + } return 0; } -static void set_dawr_cb(void *info) +static void disable_dawrs_cb(void *info) { - set_dawr(info); + struct arch_hw_breakpoint null_brk = {0}; + int i; + + for (i = 0; i < nr_wp_slots(); i++) + set_dawr(i, &null_brk); } static ssize_t dawr_write_file_bool(struct file *file, @@ -60,7 +69,7 @@ static ssize_t dawr_write_file_bool(struct file *file, /* Send error to user if they hypervisor won't allow us to write DAWR */ if (!dawr_force_enable && firmware_has_feature(FW_FEATURE_LPAR) && - set_dawr(&null_brk) != H_SUCCESS) + set_dawr(0, &null_brk) != H_SUCCESS) return -ENODEV; rc = debugfs_write_file_bool(file, user_buf, count, ppos); @@ -69,7 +78,7 @@ static ssize_t dawr_write_file_bool(struct file *file, /* If we are clearing, make sure all CPUs have the DAWR cleared */ if (!dawr_force_enable) - smp_call_function(set_dawr_cb, &null_brk, 0); + smp_call_function(disable_dawrs_cb, NULL, 0); return rc; } diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 36bc0d5c4f3a..3a409517c031 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -26,6 +26,7 @@ /* Device-tree visible constants follow */ #define ISA_V2_07B 2070 #define ISA_V3_0B 3000 +#define ISA_V3_1 3100 #define USABLE_PR (1U << 0) #define USABLE_OS (1U << 1) @@ -74,6 +75,7 @@ static struct { u64 lpcr_clear; u64 hfscr; u64 fscr; + u64 pcr; } system_registers; static void (*init_pmu_registers)(void); @@ -101,7 +103,7 @@ static void __restore_cpu_cpufeatures(void) if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); - mtspr(SPRN_PCR, PCR_MASK); + mtspr(SPRN_PCR, system_registers.pcr); } mtspr(SPRN_FSCR, system_registers.fscr); @@ -346,6 +348,14 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f) { u64 lpcr; + /* + * Linux relies on FSCR[DSCR] being clear, so that we can take the + * facility unavailable interrupt and track the task's usage of DSCR. + * See facility_unavailable_exception(). + * Clear the bit here so that feat_enable() doesn't set it. + */ + f->fscr_bit_nr = -1; + feat_enable(f); lpcr = mfspr(SPRN_LPCR); @@ -552,6 +562,18 @@ static int __init feat_enable_large_ci(struct dt_cpu_feature *f) return 1; } +static int __init feat_enable_mma(struct dt_cpu_feature *f) +{ + u64 pcr; + + feat_enable(f); + pcr = mfspr(SPRN_PCR); + pcr &= ~PCR_MMA_DIS; + mtspr(SPRN_PCR, pcr); + + return 1; +} + struct dt_cpu_feature_match { const char *name; int (*enable)(struct dt_cpu_feature *f); @@ -625,6 +647,8 @@ static struct dt_cpu_feature_match __initdata {"vector-binary128", feat_enable, 0}, {"vector-binary16", feat_enable, 0}, {"wait-v3", feat_enable, 0}, + {"prefix-instructions", feat_enable, 0}, + {"matrix-multiply-assist", feat_enable_mma, 0}, }; static bool __initdata using_dt_cpu_ftrs; @@ -654,6 +678,11 @@ static void __init cpufeatures_setup_start(u32 isa) cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300; cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00; } + + if (isa >= 3100) { + cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31; + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1; + } } static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) @@ -770,6 +799,7 @@ static void __init cpufeatures_setup_finished(void) system_registers.lpcr = mfspr(SPRN_LPCR); system_registers.hfscr = mfspr(SPRN_HFSCR); system_registers.fscr = mfspr(SPRN_FSCR); + system_registers.pcr = mfspr(SPRN_PCR); pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n", cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 7cdcb413bb44..d407981dec76 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1106,6 +1106,37 @@ static int eeh_init(void) core_initcall_sync(eeh_init); +static int eeh_device_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + switch (action) { + /* + * Note: It's not possible to perform EEH device addition (i.e. + * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on + * the device's resources, which have not yet been set up. + */ + case BUS_NOTIFY_DEL_DEVICE: + eeh_remove_device(to_pci_dev(dev)); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block eeh_device_nb = { + .notifier_call = eeh_device_notifier, +}; + +static __init int eeh_set_bus_notifier(void) +{ + bus_register_notifier(&pci_bus_type, &eeh_device_nb); + return 0; +} +arch_initcall(eeh_set_bus_notifier); + /** * eeh_probe_device() - Perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8420abd4ea1c..217ebdf5b00b 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -28,7 +28,6 @@ #include <asm/unistd.h> #include <asm/ptrace.h> #include <asm/export.h> -#include <asm/asm-405.h> #include <asm/feature-fixups.h> #include <asm/barrier.h> #include <asm/kup.h> @@ -51,6 +50,7 @@ mcheck_transfer_to_handler: mfspr r0,SPRN_DSRR1 stw r0,_DSRR1(r11) /* fall through */ +_ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler) .globl debug_transfer_to_handler debug_transfer_to_handler: @@ -59,6 +59,7 @@ debug_transfer_to_handler: mfspr r0,SPRN_CSRR1 stw r0,_CSRR1(r11) /* fall through */ +_ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler) .globl crit_transfer_to_handler crit_transfer_to_handler: @@ -94,6 +95,7 @@ crit_transfer_to_handler: rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) stw r0,KSP_LIMIT(r8) /* fall through */ +_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif #ifdef CONFIG_40x @@ -115,6 +117,7 @@ crit_transfer_to_handler: rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) stw r0,KSP_LIMIT(r8) /* fall through */ +_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif /* @@ -127,6 +130,7 @@ crit_transfer_to_handler: .globl transfer_to_handler_full transfer_to_handler_full: SAVE_NVGPRS(r11) +_ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) /* fall through */ .globl transfer_to_handler @@ -227,6 +231,23 @@ transfer_to_handler_cont: SYNC RFI /* jump to handler, enable MMU */ +#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) +4: rlwinm r12,r12,0,~_TLF_NAPPING + stw r12,TI_LOCAL_FLAGS(r2) + b power_save_ppc32_restore + +7: rlwinm r12,r12,0,~_TLF_SLEEPING + stw r12,TI_LOCAL_FLAGS(r2) + lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ + rlwinm r9,r9,0,~MSR_EE + lwz r12,_LINK(r11) /* and return to address in LR */ + kuap_restore r11, r2, r3, r4, r5 + lwz r2, GPR2(r11) + b fast_exception_return +#endif +_ASM_NOKPROBE_SYMBOL(transfer_to_handler) +_ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) + #ifdef CONFIG_TRACE_IRQFLAGS 1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to * keep interrupts disabled at this point otherwise we might risk @@ -272,21 +293,6 @@ reenable_mmu: bctr /* jump to handler */ #endif /* CONFIG_TRACE_IRQFLAGS */ -#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) -4: rlwinm r12,r12,0,~_TLF_NAPPING - stw r12,TI_LOCAL_FLAGS(r2) - b power_save_ppc32_restore - -7: rlwinm r12,r12,0,~_TLF_SLEEPING - stw r12,TI_LOCAL_FLAGS(r2) - lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ - rlwinm r9,r9,0,~MSR_EE - lwz r12,_LINK(r11) /* and return to address in LR */ - kuap_restore r11, r2, r3, r4, r5 - lwz r2, GPR2(r11) - b fast_exception_return -#endif - #ifndef CONFIG_VMAP_STACK /* * On kernel stack overflow, load up an initial stack pointer @@ -313,6 +319,7 @@ stack_ovf: mtspr SPRN_SRR1,r10 SYNC RFI +_ASM_NOKPROBE_SYMBOL(stack_ovf) #endif #ifdef CONFIG_TRACE_IRQFLAGS @@ -455,6 +462,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) lwz r7,_NIP(r1) lwz r2,GPR2(r1) lwz r1,GPR1(r1) +syscall_exit_finish: #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif @@ -462,6 +470,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) mtspr SPRN_SRR1,r8 SYNC RFI +_ASM_NOKPROBE_SYMBOL(syscall_exit_finish) #ifdef CONFIG_44x 2: li r7,0 iccci r0,r0 @@ -541,9 +550,6 @@ syscall_exit_work: addi r12,r2,TI_FLAGS 3: lwarx r8,0,r12 andc r8,r8,r11 -#ifdef CONFIG_IBM405_ERR77 - dcbt 0,r12 -#endif stwcx. r8,0,r12 bne- 3b @@ -596,6 +602,7 @@ ret_from_kernel_syscall: mtspr SPRN_SRR1, r10 SYNC RFI +_ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) /* * The fork/clone functions need to copy the full register set into @@ -799,6 +806,7 @@ fast_exception_return: lwz r11,GPR11(r11) SYNC RFI +_ASM_NOKPROBE_SYMBOL(fast_exception_return) #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) /* check if the exception happened in a restartable section */ @@ -918,9 +926,6 @@ resume_kernel: addi r5,r2,TI_FLAGS 0: lwarx r8,0,r5 andc r8,r8,r11 -#ifdef CONFIG_IBM405_ERR77 - dcbt 0,r5 -#endif stwcx. r8,0,r5 bne- 0b 1: @@ -997,7 +1002,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) mtspr SPRN_XER,r10 mtctr r11 - PPC405_ERR77(0,r1) BEGIN_FTR_SECTION lwarx r11,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) @@ -1038,6 +1042,8 @@ exc_exit_restart: exc_exit_restart_end: SYNC RFI +_ASM_NOKPROBE_SYMBOL(exc_exit_restart) +_ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) #else /* !(CONFIG_4xx || CONFIG_BOOKE) */ /* @@ -1059,16 +1065,15 @@ exc_exit_restart_end: exc_exit_restart: lwz r11,_NIP(r1) lwz r12,_MSR(r1) -exc_exit_start: mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 REST_2GPRS(11, r1) lwz r1,GPR1(r1) .globl exc_exit_restart_end exc_exit_restart_end: - PPC405_ERR77_SYNC rfi b . /* prevent prefetch past rfi */ +_ASM_NOKPROBE_SYMBOL(exc_exit_restart) /* * Returning from a critical interrupt in user mode doesn't need @@ -1109,7 +1114,6 @@ exc_exit_restart_end: lwz r11,_CTR(r1); \ mtspr SPRN_XER,r10; \ mtctr r11; \ - PPC405_ERR77(0,r1); \ stwcx. r0,0,r1; /* to clear the reservation */ \ lwz r11,_LINK(r1); \ mtlr r11; \ @@ -1129,7 +1133,6 @@ exc_exit_restart_end: lwz r10,GPR10(r1); \ lwz r11,GPR11(r1); \ lwz r1,GPR1(r1); \ - PPC405_ERR77_SYNC; \ exc_lvl_rfi; \ b .; /* prevent prefetch past exc_lvl_rfi */ @@ -1182,6 +1185,7 @@ ret_from_crit_exc: mtspr SPRN_SRR0,r9; mtspr SPRN_SRR1,r10; RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) +_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) #endif /* CONFIG_40x */ #ifdef CONFIG_BOOKE @@ -1193,6 +1197,7 @@ ret_from_crit_exc: RESTORE_xSRR(SRR0,SRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) +_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) .globl ret_from_debug_exc ret_from_debug_exc: @@ -1203,6 +1208,7 @@ ret_from_debug_exc: RESTORE_xSRR(CSRR0,CSRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI) +_ASM_NOKPROBE_SYMBOL(ret_from_debug_exc) .globl ret_from_mcheck_exc ret_from_mcheck_exc: @@ -1214,6 +1220,7 @@ ret_from_mcheck_exc: RESTORE_xSRR(DSRR0,DSRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) +_ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc) #endif /* CONFIG_BOOKE */ /* @@ -1337,6 +1344,7 @@ nonrecoverable: bl unrecoverable_exception /* shouldn't return */ b 4b +_ASM_NOKPROBE_SYMBOL(nonrecoverable) .section .bss .align 2 @@ -1391,10 +1399,5 @@ _GLOBAL(enter_rtas) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 RFI /* return to caller */ - - .globl machine_check_in_rtas -machine_check_in_rtas: - twi 31,0,0 - /* XXX load up BATs and panic */ - +_ASM_NOKPROBE_SYMBOL(enter_rtas) #endif /* CONFIG_PPC_RTAS */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index b3c9f15089b6..9d49338e0c85 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -479,11 +479,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) fast_interrupt_return: _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) kuap_check_amr r3, r4 - ld r4,_MSR(r1) - andi. r0,r4,MSR_PR + ld r5,_MSR(r1) + andi. r0,r5,MSR_PR bne .Lfast_user_interrupt_return - kuap_restore_amr r3 - andi. r0,r4,MSR_RI + kuap_restore_amr r3, r4 + andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 9d32158ce36f..2ed14d4a47f5 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -11,6 +11,7 @@ #include <asm/cacheflush.h> #include <asm/code-patching.h> #include <asm/machdep.h> +#include <asm/inst.h> #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) extern void epapr_ev_idle(void); @@ -36,10 +37,10 @@ static int __init early_init_dt_scan_epapr(unsigned long node, return -1; for (i = 0; i < (len / 4); i++) { - u32 inst = be32_to_cpu(insts[i]); - patch_instruction(epapr_hypercall_start + i, inst); + struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); + patch_instruction((struct ppc_inst *)(epapr_hypercall_start + i), inst); #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) - patch_instruction(epapr_ev_idle_start + i, inst); + patch_instruction((struct ppc_inst *)(epapr_ev_idle_start + i), inst); #endif } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ebeebab74b56..e70ebb5c318c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -939,13 +939,13 @@ EXC_COMMON_BEGIN(system_reset_common) * the right thing. We do not want to reconcile because that goes * through irq tracing which we don't want in NMI. * - * Save PACAIRQHAPPENED to _DAR (otherwise unused), and set HARD_DIS + * Save PACAIRQHAPPENED to RESULT (otherwise unused), and set HARD_DIS * as we are running with MSR[EE]=0. */ li r10,IRQS_ALL_DISABLED stb r10,PACAIRQSOFTMASK(r13) lbz r10,PACAIRQHAPPENED(r13) - std r10,_DAR(r1) + std r10,RESULT(r1) ori r10,r10,PACA_IRQ_HARD_DIS stb r10,PACAIRQHAPPENED(r13) @@ -966,12 +966,12 @@ EXC_COMMON_BEGIN(system_reset_common) /* * Restore soft mask settings. */ - ld r10,_DAR(r1) + ld r10,RESULT(r1) stb r10,PACAIRQHAPPENED(r13) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r10 + kuap_restore_amr r9, r10 EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL @@ -1117,11 +1117,30 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) li r10,MSR_RI mtmsrd r10,1 + /* + * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see + * system_reset_common) + */ + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + lbz r10,PACAIRQHAPPENED(r13) + std r10,RESULT(r1) + ori r10,r10,PACA_IRQ_HARD_DIS + stb r10,PACAIRQHAPPENED(r13) + addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) + /* + * Restore soft mask settings. + */ + ld r10,RESULT(r1) + stb r10,PACAIRQHAPPENED(r13) + ld r10,SOFTE(r1) + stb r10,PACAIRQSOFTMASK(r13) + #ifdef CONFIG_PPC_P7_NAP /* * Check if thread was in power saving mode. We come here when any @@ -1225,17 +1244,19 @@ EXC_COMMON_BEGIN(machine_check_idle_common) bl machine_check_queue_event /* - * We have not used any non-volatile GPRs here, and as a rule - * most exception code including machine check does not. - * Therefore PACA_NAPSTATELOST does not need to be set. Idle - * wakeup will restore volatile registers. + * GPR-loss wakeups are relatively straightforward, because the + * idle sleep code has saved all non-volatile registers on its + * own stack, and r1 in PACAR1. * - * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce. + * For no-loss wakeups the r1 and lr registers used by the + * early machine check handler have to be restored first. r2 is + * the kernel TOC, so no need to restore it. * * Then decrement MCE nesting after finishing with the stack. */ ld r3,_MSR(r1) ld r4,_LINK(r1) + ld r1,GPR1(r1) lhz r11,PACA_IN_MCE(r13) subi r11,r11,1 @@ -1244,7 +1265,7 @@ EXC_COMMON_BEGIN(machine_check_idle_common) mtlr r4 rlwinm r10,r3,47-31,30,31 cmpwi cr1,r10,2 - bltlr cr1 /* no state loss, return to idle caller */ + bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */ b idle_return_gpr_loss #endif @@ -1266,6 +1287,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) andc r10,r10,r3 mtmsrd r10 + lhz r12,PACA_IN_MCE(r13) + subi r12,r12,1 + sth r12,PACA_IN_MCE(r13) + /* Invoke machine_check_exception to print MCE event and panic. */ addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception @@ -2740,7 +2765,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) li r10,IRQS_ALL_DISABLED stb r10,PACAIRQSOFTMASK(r13) lbz r10,PACAIRQHAPPENED(r13) - std r10,_DAR(r1) + std r10,RESULT(r1) ori r10,r10,PACA_IRQ_HARD_DIS stb r10,PACAIRQHAPPENED(r13) @@ -2754,12 +2779,12 @@ EXC_COMMON_BEGIN(soft_nmi_common) /* * Restore soft mask settings. */ - ld r10,_DAR(r1) + ld r10,RESULT(r1) stb r10,PACAIRQHAPPENED(r13) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r10 + kuap_restore_amr r9, r10 EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 59e60a9a9f5c..78ab9a6ee6ac 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -40,8 +40,17 @@ struct kobject *fadump_kobj; #ifndef CONFIG_PRESERVE_FA_DUMP static DEFINE_MUTEX(fadump_mutex); -struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 }; -struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 }; +struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; + +#define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ +#define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ + sizeof(struct fadump_memory_range)) +static struct fadump_memory_range rngs[RESERVED_RNGS_CNT]; +struct fadump_mrange_info reserved_mrange_info = { "reserved", rngs, + RESERVED_RNGS_SZ, 0, + RESERVED_RNGS_CNT, true }; + +static void __init early_init_dt_scan_reserved_ranges(unsigned long node); #ifdef CONFIG_CMA static struct cma *fadump_cma; @@ -110,6 +119,11 @@ static int __init fadump_cma_init(void) { return 1; } int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) { + if (depth == 0) { + early_init_dt_scan_reserved_ranges(node); + return 0; + } + if (depth != 1) return 0; @@ -431,10 +445,72 @@ static int __init fadump_get_boot_mem_regions(void) return ret; } +/* + * Returns true, if the given range overlaps with reserved memory ranges + * starting at idx. Also, updates idx to index of overlapping memory range + * with the given memory range. + * False, otherwise. + */ +static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx) +{ + bool ret = false; + int i; + + for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) { + u64 rbase = reserved_mrange_info.mem_ranges[i].base; + u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size; + + if (end <= rbase) + break; + + if ((end > rbase) && (base < rend)) { + *idx = i; + ret = true; + break; + } + } + + return ret; +} + +/* + * Locate a suitable memory area to reserve memory for FADump. While at it, + * lookup reserved-ranges & avoid overlap with them, as they are used by F/W. + */ +static u64 __init fadump_locate_reserve_mem(u64 base, u64 size) +{ + struct fadump_memory_range *mrngs; + phys_addr_t mstart, mend; + int idx = 0; + u64 i, ret = 0; + + mrngs = reserved_mrange_info.mem_ranges; + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + &mstart, &mend, NULL) { + pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n", + i, mstart, mend, base); + + if (mstart > base) + base = PAGE_ALIGN(mstart); + + while ((mend > base) && ((mend - base) >= size)) { + if (!overlaps_reserved_ranges(base, base+size, &idx)) { + ret = base; + goto out; + } + + base = mrngs[idx].base + mrngs[idx].size; + base = PAGE_ALIGN(base); + } + } + +out: + return ret; +} + int __init fadump_reserve_mem(void) { - u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE; - bool is_memblock_bottom_up = memblock_bottom_up(); + u64 base, size, mem_boundary, bootmem_min; int ret = 1; if (!fw_dump.fadump_enabled) @@ -455,9 +531,9 @@ int __init fadump_reserve_mem(void) PAGE_ALIGN(fadump_calculate_reserve_size()); #ifdef CONFIG_CMA if (!fw_dump.nocma) { - align = FADUMP_CMA_ALIGNMENT; fw_dump.boot_memory_size = - ALIGN(fw_dump.boot_memory_size, align); + ALIGN(fw_dump.boot_memory_size, + FADUMP_CMA_ALIGNMENT); } #endif @@ -525,13 +601,9 @@ int __init fadump_reserve_mem(void) * Reserve memory at an offset closer to bottom of the RAM to * minimize the impact of memory hot-remove operation. */ - memblock_set_bottom_up(true); - base = memblock_find_in_range(base, mem_boundary, size, align); + base = fadump_locate_reserve_mem(base, size); - /* Restore the previous allocation mode */ - memblock_set_bottom_up(is_memblock_bottom_up); - - if (!base) { + if (!base || (base + size > mem_boundary)) { pr_err("Failed to find memory chunk for reservation!\n"); goto error_out; } @@ -728,10 +800,14 @@ void fadump_free_cpu_notes_buf(void) static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info) { + if (mrange_info->is_static) { + mrange_info->mem_range_cnt = 0; + return; + } + kfree(mrange_info->mem_ranges); - mrange_info->mem_ranges = NULL; - mrange_info->mem_ranges_sz = 0; - mrange_info->max_mem_ranges = 0; + memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0, + (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ)); } /* @@ -788,6 +864,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) { int ret; + if (mrange_info->is_static) { + pr_err("Reached array size limit for %s memory ranges\n", + mrange_info->name); + return -ENOSPC; + } + ret = fadump_alloc_mem_ranges(mrange_info); if (ret) return ret; @@ -1204,20 +1286,19 @@ static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info) * Scan reserved-ranges to consider them while reserving/releasing * memory for FADump. */ -static inline int fadump_scan_reserved_mem_ranges(void) +static void __init early_init_dt_scan_reserved_ranges(unsigned long node) { - struct device_node *root; const __be32 *prop; int len, ret = -1; unsigned long i; - root = of_find_node_by_path("/"); - if (!root) - return ret; + /* reserved-ranges already scanned */ + if (reserved_mrange_info.mem_range_cnt != 0) + return; - prop = of_get_property(root, "reserved-ranges", &len); + prop = of_get_flat_dt_prop(node, "reserved-ranges", &len); if (!prop) - return ret; + return; /* * Each reserved range is an (address,size) pair, 2 cells each, @@ -1239,7 +1320,8 @@ static inline int fadump_scan_reserved_mem_ranges(void) } } - return ret; + /* Compact reserved ranges */ + sort_and_merge_mem_ranges(&reserved_mrange_info); } /* @@ -1253,32 +1335,21 @@ static void fadump_release_memory(u64 begin, u64 end) u64 ra_start, ra_end, tstart; int i, ret; - fadump_scan_reserved_mem_ranges(); - ra_start = fw_dump.reserve_dump_area_start; ra_end = ra_start + fw_dump.reserve_dump_area_size; /* - * Add reserved dump area to reserved ranges list - * and exclude all these ranges while releasing memory. + * If reserved ranges array limit is hit, overwrite the last reserved + * memory range with reserved dump area to ensure it is excluded from + * the memory being released (reused for next FADump registration). */ - ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end); - if (ret != 0) { - /* - * Not enough memory to setup reserved ranges but the system is - * running shortage of memory. So, release all the memory except - * Reserved dump area (reused for next fadump registration). - */ - if (begin < ra_end && end > ra_start) { - if (begin < ra_start) - fadump_release_reserved_area(begin, ra_start); - if (end > ra_end) - fadump_release_reserved_area(ra_end, end); - } else - fadump_release_reserved_area(begin, end); + if (reserved_mrange_info.mem_range_cnt == + reserved_mrange_info.max_mem_ranges) + reserved_mrange_info.mem_range_cnt--; + ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end); + if (ret != 0) return; - } /* Get the reserved ranges list in order first. */ sort_and_merge_mem_ranges(&reserved_mrange_info); diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 3235a8da6af7..1dfccf58fbb1 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -119,6 +119,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* restore registers and return */ /* we haven't used ctr or xer or lr */ blr +_ASM_NOKPROBE_SYMBOL(load_up_fpu) /* * save_fpu(tsk) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 97c887950c3c..e2459550a3bf 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -297,7 +297,7 @@ MachineCheck: cmpwi cr1, r4, 0 #endif beq cr1, machine_check_tramp - b machine_check_in_rtas + twi 31, 0, 0 #else b machine_check_tramp #endif diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 2cec543c38f0..a22a8209971b 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -36,7 +36,6 @@ #include <asm/asm-offsets.h> #include <asm/ptrace.h> #include <asm/export.h> -#include <asm/asm-405.h> #include "head_32.h" @@ -176,135 +175,16 @@ _ENTRY(saved_ksp_limit) * 0x0300 - Data Storage Exception * This happens for just a few reasons. U0 set (but we don't do that), * or zone protection fault (user violation, write to protected page). - * If this is just an update of modified status, we do that quickly - * and exit. Otherwise, we call heavywight functions to do the work. + * The other Data TLB exceptions bail out to this point + * if they can't resolve the lightweight TLB fault. */ START_EXCEPTION(0x0300, DataStorage) - mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH1, r11 -#ifdef CONFIG_403GCX - stw r12, 0(r0) - stw r9, 4(r0) - mfcr r11 - mfspr r12, SPRN_PID - stw r11, 8(r0) - stw r12, 12(r0) -#else - mtspr SPRN_SPRG_SCRATCH3, r12 - mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r11 - mfspr r12, SPRN_PID - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH5, r12 -#endif - - /* First, check if it was a zone fault (which means a user - * tried to access a kernel or read-protected page - always - * a SEGV). All other faults here must be stores, so no - * need to check ESR_DST as well. */ - mfspr r10, SPRN_ESR - andis. r10, r10, ESR_DIZ@h - bne 2f - - mfspr r10, SPRN_DEAR /* Get faulting address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - li r9, 0 - mtspr SPRN_PID, r9 /* TLB will have 0 TID */ - b 4f - - /* Get the PGD for the current thread. - */ -3: - mfspr r11,SPRN_SPRG_THREAD - lwz r11,PGDIR(r11) -4: - tophys(r11, r11) - rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r11, 0(r11) /* Get L1 entry */ - rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */ - beq 2f /* Bail if no table */ - - rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r12) /* Get Linux PTE */ - - andi. r9, r11, _PAGE_RW /* Is it writeable? */ - beq 2f /* Bail if not */ - - /* Update 'changed'. - */ - ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE - stw r11, 0(r12) /* Update Linux page table */ - - /* Most of the Linux PTE is ready to load into the TLB LO. - * We set ZSEL, where only the LS-bit determines user access. - * We set execute, because we don't have the granularity to - * properly set this at the page level (Linux problem). - * If shared is set, we cause a zero PID->TID load. - * Many of these bits are software only. Bits we don't set - * here we (properly should) assume have the appropriate value. - */ - li r12, 0x0ce2 - andc r11, r11, r12 /* Make sure 20, 21 are zero */ - - /* find the TLB index that caused the fault. It has to be here. - */ - tlbsx r9, 0, r10 - - tlbwe r11, r9, TLB_DATA /* Load TLB LO */ - - /* Done...restore registers and get out of here. - */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 -#endif - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 - PPC405_ERR77_SYNC - rfi /* Should sync shadow TLBs */ - b . /* prevent prefetch past rfi */ - -2: - /* The bailout. Restore registers to pre-exception conditions - * and call the heavyweights to help us out. - */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 -#endif - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 - b DataAccess + EXCEPTION_PROLOG + mfspr r5, SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5, _ESR(r11) + mfspr r4, SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + stw r4, _DEAR(r11) + EXC_XFER_LITE(0x300, handle_page_fault) /* * 0x0400 - Instruction Storage Exception @@ -372,21 +252,11 @@ _ENTRY(saved_ksp_limit) START_EXCEPTION(0x1100, DTLBMiss) mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_SCRATCH1, r11 -#ifdef CONFIG_403GCX - stw r12, 0(r0) - stw r9, 4(r0) - mfcr r11 - mfspr r12, SPRN_PID - stw r11, 8(r0) - stw r12, 12(r0) -#else mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r11 - mfspr r12, SPRN_PID - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH5, r12 -#endif + mfcr r12 + mfspr r9, SPRN_PID + mtspr SPRN_SPRG_SCRATCH5, r9 mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -409,28 +279,34 @@ _ENTRY(saved_ksp_limit) 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r12, 0(r11) /* Get L1 entry */ - andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + lwz r11, 0(r11) /* Get L1 entry */ + andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ beq 2f /* Bail if no table */ - rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r12) /* Get Linux PTE */ - andi. r9, r11, _PAGE_PRESENT - beq 5f + rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r11) /* Get Linux PTE */ +#ifdef CONFIG_SWAP + li r9, _PAGE_PRESENT | _PAGE_ACCESSED +#else + li r9, _PAGE_PRESENT +#endif + andc. r9, r9, r11 /* Check permission */ + bne 5f - ori r11, r11, _PAGE_ACCESSED - stw r11, 0(r12) + rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ + and r9, r9, r11 /* hwwrite = dirty & rw */ + rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ /* Create TLB tag. This is the faulting address plus a static * set of bits. These are size, valid, E, U0. */ - li r12, 0x00c0 - rlwimi r10, r12, 0, 20, 31 + li r9, 0x00c0 + rlwimi r10, r9, 0, 20, 31 b finish_tlb_load 2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r12, 2, 22, 24 + rlwinm. r9, r11, 2, 22, 24 beq 5f /* Create TLB tag. This is the faulting address, plus a static @@ -438,7 +314,6 @@ _ENTRY(saved_ksp_limit) */ ori r9, r9, 0x40 rlwimi r10, r9, 0, 20, 31 - mr r11, r12 b finish_tlb_load @@ -446,24 +321,14 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH5 + mtspr SPRN_PID, r9 + mtcr r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 -#endif mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 - b DataAccess + b DataStorage /* 0x1200 - Instruction TLB Miss Exception * Nearly the same as above, except we get our information from different @@ -472,21 +337,11 @@ _ENTRY(saved_ksp_limit) START_EXCEPTION(0x1200, ITLBMiss) mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_SCRATCH1, r11 -#ifdef CONFIG_403GCX - stw r12, 0(r0) - stw r9, 4(r0) - mfcr r11 - mfspr r12, SPRN_PID - stw r11, 8(r0) - stw r12, 12(r0) -#else mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r11 - mfspr r12, SPRN_PID - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH5, r12 -#endif + mfcr r12 + mfspr r9, SPRN_PID + mtspr SPRN_SPRG_SCRATCH5, r9 mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -509,28 +364,34 @@ _ENTRY(saved_ksp_limit) 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r12, 0(r11) /* Get L1 entry */ - andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + lwz r11, 0(r11) /* Get L1 entry */ + andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ beq 2f /* Bail if no table */ - rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r12) /* Get Linux PTE */ - andi. r9, r11, _PAGE_PRESENT - beq 5f + rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r11) /* Get Linux PTE */ +#ifdef CONFIG_SWAP + li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC +#else + li r9, _PAGE_PRESENT | _PAGE_EXEC +#endif + andc. r9, r9, r11 /* Check permission */ + bne 5f - ori r11, r11, _PAGE_ACCESSED - stw r11, 0(r12) + rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ + and r9, r9, r11 /* hwwrite = dirty & rw */ + rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ /* Create TLB tag. This is the faulting address plus a static * set of bits. These are size, valid, E, U0. */ - li r12, 0x00c0 - rlwimi r10, r12, 0, 20, 31 + li r9, 0x00c0 + rlwimi r10, r9, 0, 20, 31 b finish_tlb_load 2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r12, 2, 22, 24 + rlwinm. r9, r11, 2, 22, 24 beq 5f /* Create TLB tag. This is the faulting address, plus a static @@ -538,7 +399,6 @@ _ENTRY(saved_ksp_limit) */ ori r9, r9, 0x40 rlwimi r10, r9, 0, 20, 31 - mr r11, r12 b finish_tlb_load @@ -546,21 +406,11 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH5 + mtspr SPRN_PID, r9 + mtcr r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 -#endif mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 b InstructionAccess @@ -569,13 +419,7 @@ _ENTRY(saved_ksp_limit) EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) -#ifdef CONFIG_IBM405_ERR51 - /* 405GP errata 51 */ - START_EXCEPTION(0x1700, Trap_17) - b DTLBMiss -#else EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) -#endif EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD) @@ -636,7 +480,6 @@ _ENTRY(saved_ksp_limit) lwz r12,GPR12(r11) lwz r10,crit_r10@l(0) lwz r11,crit_r11@l(0) - PPC405_ERR77_SYNC rfci b . @@ -669,18 +512,6 @@ WDTException: (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) -/* - * The other Data TLB exceptions bail out to this point - * if they can't resolve the lightweight TLB fault. - */ -DataAccess: - EXCEPTION_PROLOG - mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ - stw r5,_ESR(r11) - mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ - stw r4, _DEAR(r11) - EXC_XFER_LITE(0x300, handle_page_fault) - /* Other PowerPC processors, namely those derived from the 6xx-series * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. * However, for the 4xx-series processors these are neither defined nor @@ -692,7 +523,7 @@ DataAccess: * miss get to this point to load the TLB. * r10 - TLB_TAG value * r11 - Linux PTE - * r12, r9 - available to use + * r9 - available to use * PID - loaded with proper value when we get here * Upon exit, we reload everything and RFI. * Actually, it will fit now, but oh well.....a common place @@ -701,45 +532,32 @@ DataAccess: tlb_4xx_index: .long 0 finish_tlb_load: - /* load the next available TLB index. - */ - lwz r9, tlb_4xx_index@l(0) - addi r9, r9, 1 - andi. r9, r9, (PPC40X_TLB_SIZE-1) - stw r9, tlb_4xx_index@l(0) - -6: /* * Clear out the software-only bits in the PTE to generate the * TLB_DATA value. These are the bottom 2 bits of the RPM, the * top 3 bits of the zone field, and M. */ - li r12, 0x0ce2 - andc r11, r11, r12 + li r9, 0x0ce2 + andc r11, r11, r9 + + /* load the next available TLB index. */ + lwz r9, tlb_4xx_index@l(0) + addi r9, r9, 1 + andi. r9, r9, PPC40X_TLB_SIZE - 1 + stw r9, tlb_4xx_index@l(0) tlbwe r11, r9, TLB_DATA /* Load TLB LO */ tlbwe r10, r9, TLB_TAG /* Load TLB HI */ /* Done...restore registers and get out of here. */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH5 + mtspr SPRN_PID, r9 + mtcr r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 -#endif mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 - PPC405_ERR77_SYNC rfi /* Should sync shadow TLBs */ b . /* prevent prefetch past rfi */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ddfbd02140d9..0e05a9a47a4b 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -947,15 +947,8 @@ start_here_multiplatform: std r0,0(r4) #endif - /* The following gets the stack set up with the regs */ - /* pointing to the real addr of the kernel stack. This is */ - /* all done to support the C function call below which sets */ - /* up the htab. This is done because we have relocated the */ - /* kernel but are still running in real mode. */ - - LOAD_REG_ADDR(r3,init_thread_union) - /* set up a stack pointer */ + LOAD_REG_ADDR(r3,init_thread_union) LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) add r1,r3,r1 li r0,0 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 073a651787df..abb71fad7d6a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/magic.h> +#include <linux/sizes.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> @@ -31,10 +32,15 @@ #include "head_32.h" +.macro compare_to_kernel_boundary scratch, addr #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ -#define SIMPLE_KERNEL_ADDRESS 1 + not. \scratch, \addr +#else + rlwinm \scratch, \addr, 16, 0xfff8 + cmpli cr0, \scratch, PAGE_OFFSET@h #endif +.endm /* * We need an ITLB miss handler for kernel addresses if: @@ -196,7 +202,7 @@ SystemCall: InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mtspr SPRN_SPRG_SCRATCH1, r11 #endif @@ -206,44 +212,31 @@ InstructionTLBMiss: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10) mtspr SPRN_MD_EPN, r10 - /* Only modules will cause ITLB Misses as we always - * pin the first 8MB of kernel memory */ #ifdef ITLB_MISS_KERNEL mfcr r11 -#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) - cmpi cr0, r10, 0 /* Address >= 0x80000000 */ -#else - rlwinm r10, r10, 16, 0xfff8 - cmpli cr0, r10, PAGE_OFFSET@h -#ifndef CONFIG_PIN_TLB_TEXT - /* It is assumed that kernel code fits into the first 32M */ -0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h - patch_site 0b, patch__itlbmiss_linmem_top -#endif -#endif + compare_to_kernel_boundary r10, r10 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ #ifdef ITLB_MISS_KERNEL -#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) - bge+ 3f -#else blt+ 3f -#endif -#ifndef CONFIG_PIN_TLB_TEXT - blt cr7, ITLBMissLinear -#endif rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha 3: + mtcr r11 #endif +#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ + mtspr SPRN_MD_TWC, r11 +#else lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MI_TWC, r10 /* Set segment attributes */ - mtspr SPRN_MD_TWC, r10 +#endif mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ -#ifdef ITLB_MISS_KERNEL - mtcr r11 +#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) + rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K + mtspr SPRN_MI_TWC, r11 #endif #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT @@ -263,7 +256,7 @@ InstructionTLBMiss: /* Restore registers */ 0: mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mfspr r11, SPRN_SPRG_SCRATCH1 #endif rfi @@ -281,33 +274,6 @@ InstructionTLBMiss: rfi #endif -#ifndef CONFIG_PIN_TLB_TEXT -ITLBMissLinear: - mtcr r11 -#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23 - patch_site 0f, patch__itlbmiss_linmem_top8 - - mfspr r10, SPRN_SRR0 -0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha - rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K - ori r11, r11, MI_PS512K | MI_SVALID - rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */ -#else - /* Set 8M byte page and mark it valid */ - li r11, MI_PS8MEG | MI_SVALID - rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */ -#endif - mtspr SPRN_MI_TWC, r11 - ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT - mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ - -0: mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 - rfi - patch_site 0b, patch__itlbmiss_exit_2 -#endif - . = 0x1200 DataStoreTLBMiss: mtspr SPRN_DAR, r10 @@ -318,21 +284,9 @@ DataStoreTLBMiss: * kernel page tables. */ mfspr r10, SPRN_MD_EPN - rlwinm r10, r10, 16, 0xfff8 - cmpli cr0, r10, PAGE_OFFSET@h -#ifndef CONFIG_PIN_TLB_IMMR - cmpli cr6, r10, VIRT_IMMR_BASE@h -#endif -0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h - patch_site 0b, patch__dtlbmiss_linmem_top - + compare_to_kernel_boundary r10, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table */ blt+ 3f -#ifndef CONFIG_PIN_TLB_IMMR -0: beq- cr6, DTLBMissIMMR - patch_site 0b, patch__dtlbmiss_immr_jmp -#endif - blt cr7, DTLBMissLinear rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha 3: @@ -350,6 +304,7 @@ DataStoreTLBMiss: * above. */ rlwimi r11, r10, 0, _PAGE_GUARDED + rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MD_TWC, r11 /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. @@ -383,61 +338,16 @@ DataStoreTLBMiss: rfi patch_site 0b, patch__dtlbmiss_exit_1 -DTLBMissIMMR: - mtcr r11 - /* Set 512k byte guarded page and mark it valid */ - li r10, MD_PS512K | MD_GUARDED | MD_SVALID - mtspr SPRN_MD_TWC, r10 - mfspr r10, SPRN_IMMR /* Get current IMMR */ - rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT | _PAGE_NO_CACHE - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - - li r11, RPN_PATTERN - -0: mfspr r10, SPRN_DAR +#ifdef CONFIG_PERF_EVENTS + patch_site 0f, patch__dtlbmiss_perf +0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + addi r10, r10, 1 + stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + mfspr r10, SPRN_DAR mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r11, SPRN_M_TW rfi - patch_site 0b, patch__dtlbmiss_exit_2 - -DTLBMissLinear: - mtcr r11 - rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */ -#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_DATA_SHIFT < 23 - patch_site 0f, patch__dtlbmiss_romem_top8 - -0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha - rlwinm r11, r11, 0, 0xff800000 - neg r10, r11 - or r11, r11, r10 - rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K - ori r11, r11, MI_PS512K | MI_SVALID - mfspr r10, SPRN_MD_EPN - rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */ -#else - /* Set 8M byte page and mark it valid */ - li r11, MD_PS8MEG | MD_SVALID #endif - mtspr SPRN_MD_TWC, r11 -#ifdef CONFIG_STRICT_KERNEL_RWX - patch_site 0f, patch__dtlbmiss_romem_top - -0: subis r11, r10, 0 - rlwimi r10, r11, 11, _PAGE_RO -#endif - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - - li r11, RPN_PATTERN - -0: mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_M_TW - rfi - patch_site 0b, patch__dtlbmiss_exit_3 /* This is an instruction TLB error on the MPC8xx. This could be due * to many reasons, such as executing guarded memory or illegal instruction @@ -485,18 +395,6 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) -/* Called from DataStoreTLBMiss when perf TLB misses events are activated */ -#ifdef CONFIG_PERF_EVENTS - patch_site 0f, patch__dtlbmiss_perf -0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - addi r10, r10, 1 - stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_M_TW - rfi -#endif - stack_overflow: vmap_stack_overflow_exception @@ -563,14 +461,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ cmpli cr1, r11, PAGE_OFFSET@h mfspr r11, SPRN_M_TWB /* Get level 1 table */ blt+ cr1, 3f - rlwinm r11, r10, 16, 0xfff8 - -0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h - patch_site 0b, patch__fixupdar_linmem_top /* create physical page address from effective address */ tophys(r11, r10) - blt- cr7, 201f mfspr r11, SPRN_M_TWB /* Get level 1 table */ rlwinm r11, r11, 0, 20, 31 oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha @@ -581,7 +474,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r11, SPRN_MD_TWC lwz r11, 0(r11) /* Get the pte */ bt 28,200f /* bit 28 = Large page (8M) */ - bt 29,202f /* bit 29 = Large page (8M or 512K) */ /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 201: lwz r11,0(r11) @@ -608,11 +500,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31 b 201b -202: - /* concat physical page address(r11) and page offset(r10) */ - rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31 - b 201b - 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 @@ -747,6 +634,31 @@ start_here: rfi /* Load up the kernel context */ 2: +#ifdef CONFIG_PIN_TLB_IMMR + lis r0, MD_TWAM@h + oris r0, r0, 0x1f00 + mtspr SPRN_MD_CTR, r0 + LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) + tlbie r0 + mtspr SPRN_MD_EPN, r0 + LOAD_REG_IMMEDIATE(r0, MD_SVALID | MD_PS512K | MD_GUARDED) + mtspr SPRN_MD_TWC, r0 + mfspr r0, SPRN_IMMR + rlwinm r0, r0, 0, 0xfff80000 + ori r0, r0, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ + _PAGE_NO_CACHE | _PAGE_PRESENT + mtspr SPRN_MD_RPN, r0 + lis r0, (MD_TWAM | MD_RSV4I)@h + mtspr SPRN_MD_CTR, r0 +#endif +#ifndef CONFIG_PIN_TLB_TEXT + li r0, 0 + mtspr SPRN_MI_CTR, r0 +#endif +#if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) + lis r0, MD_TWAM@h + mtspr SPRN_MD_CTR, r0 +#endif tlbia /* Clear all TLB entries */ sync /* wait for tlbia/tlbie to finish */ @@ -779,17 +691,10 @@ start_here: initial_mmu: li r8, 0 mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */ - lis r10, MD_RESETVAL@h -#ifndef CONFIG_8xx_COPYBACK - oris r10, r10, MD_WTDEF@h -#endif + lis r10, MD_TWAM@h mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */ tlbia /* Invalidate all TLB entries */ -#ifdef CONFIG_PIN_TLB_DATA - oris r10, r10, MD_RSV4I@h - mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ -#endif lis r8, MI_APG_INIT@h /* Set protection modes */ ori r8, r8, MI_APG_INIT@l @@ -798,55 +703,32 @@ initial_mmu: ori r8, r8, MD_APG_INIT@l mtspr SPRN_MD_AP, r8 - /* Map a 512k page for the IMMR to get the processor - * internal registers (among other things). - */ -#ifdef CONFIG_PIN_TLB_IMMR - oris r10, r10, MD_RSV4I@h - ori r10, r10, 0x1c00 - mtspr SPRN_MD_CTR, r10 - - mfspr r9, 638 /* Get current IMMR */ - andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */ - - lis r8, VIRT_IMMR_BASE@h /* Create vaddr for TLB */ - ori r8, r8, MD_EVALID /* Mark it valid */ - mtspr SPRN_MD_EPN, r8 - li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ - ori r8, r8, MD_SVALID /* Make it valid */ - mtspr SPRN_MD_TWC, r8 - mr r8, r9 /* Create paddr for TLB */ - ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ - mtspr SPRN_MD_RPN, r8 -#endif - - /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */ -#ifdef CONFIG_PIN_TLB_TEXT + /* Map the lower RAM (up to 32 Mbytes) into the ITLB and DTLB */ lis r8, MI_RSV4I@h ori r8, r8, 0x1c00 -#endif + oris r12, r10, MD_RSV4I@h + ori r12, r12, 0x1c00 li r9, 4 /* up to 4 pages of 8M */ mtctr r9 lis r9, KERNELBASE@h /* Create vaddr for TLB */ li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */ li r11, MI_BOOTINIT /* Create RPN for address 0 */ - lis r12, _einittext@h - ori r12, r12, _einittext@l 1: -#ifdef CONFIG_PIN_TLB_TEXT mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ addi r8, r8, 0x100 -#endif - ori r0, r9, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r0 mtspr SPRN_MI_TWC, r10 mtspr SPRN_MI_RPN, r11 /* Store TLB entry */ + mtspr SPRN_MD_CTR, r12 + addi r12, r12, 0x100 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r10 + mtspr SPRN_MD_RPN, r11 addis r9, r9, 0x80 addis r11, r11, 0x80 - cmpl cr0, r9, r12 - bdnzf gt, 1b + bdnz 1b /* Since the cache is enabled according to the information we * just loaded into the TLB, invalidate and enable the caches here. @@ -857,17 +739,7 @@ initial_mmu: mtspr SPRN_DC_CST, r8 lis r8, IDC_ENABLE@h mtspr SPRN_IC_CST, r8 -#ifdef CONFIG_8xx_COPYBACK mtspr SPRN_DC_CST, r8 -#else - /* For a debug option, I left this here to easily enable - * the write through cache mode - */ - lis r8, DC_SFWT@h - mtspr SPRN_DC_CST, r8 - lis r8, IDC_ENABLE@h - mtspr SPRN_DC_CST, r8 -#endif /* Disable debug mode entry on breakpoints */ mfspr r8, SPRN_DER #ifdef CONFIG_PERF_EVENTS @@ -878,6 +750,108 @@ initial_mmu: mtspr SPRN_DER, r8 blr +#ifdef CONFIG_PIN_TLB +_GLOBAL(mmu_pin_tlb) + lis r9, (1f - PAGE_OFFSET)@h + ori r9, r9, (1f - PAGE_OFFSET)@l + mfmsr r10 + mflr r11 + li r12, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI) + rlwinm r0, r10, 0, ~MSR_RI + rlwinm r0, r0, 0, ~MSR_EE + mtmsr r0 + isync + .align 4 + mtspr SPRN_SRR0, r9 + mtspr SPRN_SRR1, r12 + rfi +1: + li r5, 0 + lis r6, MD_TWAM@h + mtspr SPRN_MI_CTR, r5 + mtspr SPRN_MD_CTR, r6 + tlbia + +#ifdef CONFIG_PIN_TLB_TEXT + LOAD_REG_IMMEDIATE(r5, 28 << 8) + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) + LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) + LOAD_REG_ADDR(r9, _sinittext) + li r0, 4 + mtctr r0 + +2: ori r0, r6, MI_EVALID + mtspr SPRN_MI_CTR, r5 + mtspr SPRN_MI_EPN, r0 + mtspr SPRN_MI_TWC, r7 + mtspr SPRN_MI_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r9 + bdnzt lt, 2b + lis r0, MI_RSV4I@h + mtspr SPRN_MI_CTR, r0 +#endif + LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) +#ifdef CONFIG_PIN_TLB_DATA + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) +#ifdef CONFIG_PIN_TLB_IMMR + li r0, 3 +#else + li r0, 4 +#endif + mtctr r0 + cmpwi r4, 0 + beq 4f + LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) + LOAD_REG_ADDR(r9, _sinittext) + +2: ori r0, r6, MD_EVALID + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r9 + bdnzt lt, 2b + +4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) +2: ori r0, r6, MD_EVALID + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r3 + bdnzt lt, 2b +#endif +#ifdef CONFIG_PIN_TLB_IMMR + LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) + LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED) + mfspr r8, SPRN_IMMR + rlwinm r8, r8, 0, 0xfff80000 + ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ + _PAGE_NO_CACHE | _PAGE_PRESENT + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 +#endif +#if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA) + lis r0, (MD_RSV4I | MD_TWAM)@h + mtspr SPRN_MI_CTR, r0 +#endif + mtspr SPRN_SRR1, r10 + mtspr SPRN_SRR0, r11 + rfi +#endif /* CONFIG_PIN_TLB */ /* * We put a few things here that have to be page-aligned. diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index bd2e5ed8dd50..18f87bf9e32b 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -534,7 +534,7 @@ struct exception_regs { }; /* ensure this structure is always sized to a multiple of the stack alignment */ -#define STACK_EXC_LVL_FRAME_SIZE _ALIGN_UP(sizeof (struct exception_regs), 16) +#define STACK_EXC_LVL_FRAME_SIZE ALIGN(sizeof (struct exception_regs), 16) #endif /* __ASSEMBLY__ */ #endif /* __HEAD_BOOKE_H__ */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 72f461bd70fb..0000daf0e1da 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -24,13 +24,14 @@ #include <asm/debug.h> #include <asm/debugfs.h> #include <asm/hvcall.h> +#include <asm/inst.h> #include <linux/uaccess.h> /* * Stores the breakpoints currently in use on each breakpoint address * register for every cpu */ -static DEFINE_PER_CPU(struct perf_event *, bp_per_reg); +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]); /* * Returns total number of data or instruction breakpoints available. @@ -38,10 +39,21 @@ static DEFINE_PER_CPU(struct perf_event *, bp_per_reg); int hw_breakpoint_slots(int type) { if (type == TYPE_DATA) - return HBP_NUM; + return nr_wp_slots(); return 0; /* no instruction breakpoints available */ } +static bool single_step_pending(void) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (current->thread.last_hit_ubp[i]) + return true; + } + return false; +} + /* * Install a perf counter breakpoint. * @@ -54,16 +66,26 @@ int hw_breakpoint_slots(int type) int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot = this_cpu_ptr(&bp_per_reg); + struct perf_event **slot; + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + slot = this_cpu_ptr(&bp_per_reg[i]); + if (!*slot) { + *slot = bp; + break; + } + } - *slot = bp; + if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot")) + return -EBUSY; /* * Do not install DABR values if the instruction must be single-stepped. * If so, DABR will be populated in single_step_dabr_instruction(). */ - if (current->thread.last_hit_ubp != bp) - __set_breakpoint(info); + if (!single_step_pending()) + __set_breakpoint(i, info); return 0; } @@ -79,15 +101,248 @@ int arch_install_hw_breakpoint(struct perf_event *bp) */ void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - struct perf_event **slot = this_cpu_ptr(&bp_per_reg); + struct arch_hw_breakpoint null_brk = {0}; + struct perf_event **slot; + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + slot = this_cpu_ptr(&bp_per_reg[i]); + if (*slot == bp) { + *slot = NULL; + break; + } + } - if (*slot != bp) { - WARN_ONCE(1, "Can't find the breakpoint"); + if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot")) return; + + __set_breakpoint(i, &null_brk); +} + +static bool is_ptrace_bp(struct perf_event *bp) +{ + return bp->overflow_handler == ptrace_triggered; +} + +struct breakpoint { + struct list_head list; + struct perf_event *bp; + bool ptrace_bp; +}; + +static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]); +static LIST_HEAD(task_bps); + +static struct breakpoint *alloc_breakpoint(struct perf_event *bp) +{ + struct breakpoint *tmp; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return ERR_PTR(-ENOMEM); + tmp->bp = bp; + tmp->ptrace_bp = is_ptrace_bp(bp); + return tmp; +} + +static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2) +{ + __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr; + + bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE); + bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE); + bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE); + bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE); + + return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr); +} + +static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp) +{ + return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp; +} + +static bool can_co_exist(struct breakpoint *b, struct perf_event *bp) +{ + return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp)); +} + +static int task_bps_add(struct perf_event *bp) +{ + struct breakpoint *tmp; + + tmp = alloc_breakpoint(bp); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + list_add(&tmp->list, &task_bps); + return 0; +} + +static void task_bps_remove(struct perf_event *bp) +{ + struct list_head *pos, *q; + + list_for_each_safe(pos, q, &task_bps) { + struct breakpoint *tmp = list_entry(pos, struct breakpoint, list); + + if (tmp->bp == bp) { + list_del(&tmp->list); + kfree(tmp); + break; + } } +} - *slot = NULL; - hw_breakpoint_disable(); +/* + * If any task has breakpoint from alternate infrastructure, + * return true. Otherwise return false. + */ +static bool all_task_bps_check(struct perf_event *bp) +{ + struct breakpoint *tmp; + + list_for_each_entry(tmp, &task_bps, list) { + if (!can_co_exist(tmp, bp)) + return true; + } + return false; +} + +/* + * If same task has breakpoint from alternate infrastructure, + * return true. Otherwise return false. + */ +static bool same_task_bps_check(struct perf_event *bp) +{ + struct breakpoint *tmp; + + list_for_each_entry(tmp, &task_bps, list) { + if (tmp->bp->hw.target == bp->hw.target && + !can_co_exist(tmp, bp)) + return true; + } + return false; +} + +static int cpu_bps_add(struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + struct breakpoint *tmp; + int i = 0; + + tmp = alloc_breakpoint(bp); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (!cpu_bp[i]) { + cpu_bp[i] = tmp; + break; + } + } + return 0; +} + +static void cpu_bps_remove(struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + int i = 0; + + cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (!cpu_bp[i]) + continue; + + if (cpu_bp[i]->bp == bp) { + kfree(cpu_bp[i]); + cpu_bp[i] = NULL; + break; + } + } +} + +static bool cpu_bps_check(int cpu, struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + int i; + + cpu_bp = per_cpu_ptr(cpu_bps, cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) + return true; + } + return false; +} + +static bool all_cpu_bps_check(struct perf_event *bp) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (cpu_bps_check(cpu, bp)) + return true; + } + return false; +} + +/* + * We don't use any locks to serialize accesses to cpu_bps or task_bps + * because are already inside nr_bp_mutex. + */ +int arch_reserve_bp_slot(struct perf_event *bp) +{ + int ret; + + /* ptrace breakpoint */ + if (is_ptrace_bp(bp)) { + if (all_cpu_bps_check(bp)) + return -ENOSPC; + + if (same_task_bps_check(bp)) + return -ENOSPC; + + return task_bps_add(bp); + } + + /* perf breakpoint */ + if (is_kernel_addr(bp->attr.bp_addr)) + return 0; + + if (bp->hw.target && bp->cpu == -1) { + if (same_task_bps_check(bp)) + return -ENOSPC; + + return task_bps_add(bp); + } else if (!bp->hw.target && bp->cpu != -1) { + if (all_task_bps_check(bp)) + return -ENOSPC; + + return cpu_bps_add(bp); + } + + if (same_task_bps_check(bp)) + return -ENOSPC; + + ret = cpu_bps_add(bp); + if (ret) + return ret; + ret = task_bps_add(bp); + if (ret) + cpu_bps_remove(bp); + + return ret; +} + +void arch_release_bp_slot(struct perf_event *bp) +{ + if (!is_kernel_addr(bp->attr.bp_addr)) { + if (bp->hw.target) + task_bps_remove(bp); + if (bp->cpu != -1) + cpu_bps_remove(bp); + } } /* @@ -102,8 +357,14 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp) * restoration variables to prevent dangling pointers. * FIXME, this should not be using bp->ctx at all! Sayeth peterz. */ - if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) - bp->ctx->task->thread.last_hit_ubp = NULL; + if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) { + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (bp->ctx->task->thread.last_hit_ubp[i] == bp) + bp->ctx->task->thread.last_hit_ubp[i] = NULL; + } + } } /* @@ -140,10 +401,10 @@ int arch_bp_generic_fields(int type, int *gen_bp_type) * <---8 bytes---> * * In this case, we should configure hw as: - * start_addr = address & ~HW_BREAKPOINT_ALIGN + * start_addr = address & ~(HW_BREAKPOINT_SIZE - 1) * len = 16 bytes * - * @start_addr and @end_addr are inclusive. + * @start_addr is inclusive but @end_addr is exclusive. */ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) { @@ -151,14 +412,14 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) u16 hw_len; unsigned long start_addr, end_addr; - start_addr = hw->address & ~HW_BREAKPOINT_ALIGN; - end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN; - hw_len = end_addr - start_addr + 1; + start_addr = ALIGN_DOWN(hw->address, HW_BREAKPOINT_SIZE); + end_addr = ALIGN(hw->address + hw->len, HW_BREAKPOINT_SIZE); + hw_len = end_addr - start_addr; if (dawr_enabled()) { max_len = DAWR_MAX_LEN; /* DAWR region can't cross 512 bytes boundary */ - if ((start_addr >> 9) != (end_addr >> 9)) + if (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M)) return -EINVAL; } else if (IS_ENABLED(CONFIG_PPC_8xx)) { /* 8xx can setup a range without limitation */ @@ -215,90 +476,209 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) { struct arch_hw_breakpoint *info; + int i; - if (likely(!tsk->thread.last_hit_ubp)) - return; + for (i = 0; i < nr_wp_slots(); i++) { + if (unlikely(tsk->thread.last_hit_ubp[i])) + goto reset; + } + return; - info = counter_arch_bp(tsk->thread.last_hit_ubp); +reset: regs->msr &= ~MSR_SE; - __set_breakpoint(info); - tsk->thread.last_hit_ubp = NULL; + for (i = 0; i < nr_wp_slots(); i++) { + info = counter_arch_bp(__this_cpu_read(bp_per_reg[i])); + __set_breakpoint(i, info); + tsk->thread.last_hit_ubp[i] = NULL; + } } -static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info) +static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info) { return ((info->address <= dar) && (dar - info->address < info->len)); } -static bool -dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info) +static bool dar_user_range_overlaps(unsigned long dar, int size, + struct arch_hw_breakpoint *info) +{ + return ((dar < info->address + info->len) && + (dar + size > info->address)); +} + +static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) +{ + unsigned long hw_start_addr, hw_end_addr; + + hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); + + return ((hw_start_addr <= dar) && (hw_end_addr > dar)); +} + +static bool dar_hw_range_overlaps(unsigned long dar, int size, + struct arch_hw_breakpoint *info) { - return ((dar <= info->address + info->len - 1) && - (dar + size - 1 >= info->address)); + unsigned long hw_start_addr, hw_end_addr; + + hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); + + return ((dar < hw_end_addr) && (dar + size > hw_start_addr)); } /* - * Handle debug exception notifications. + * If hw has multiple DAWR registers, we also need to check all + * dawrx constraint bits to confirm this is _really_ a valid event. */ -static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp, - struct arch_hw_breakpoint *info) +static bool check_dawrx_constraints(struct pt_regs *regs, int type, + struct arch_hw_breakpoint *info) { - unsigned int instr = 0; - int ret, type, size; - struct instruction_op op; - unsigned long addr = info->address; + if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ)) + return false; - if (__get_user_inatomic(instr, (unsigned int *)regs->nip)) - goto fail; + if (OP_IS_STORE(type) && !(info->type & HW_BRK_TYPE_WRITE)) + return false; - ret = analyse_instr(&op, regs, instr); - type = GETTYPE(op.type); - size = GETSIZE(op.type); + if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL)) + return false; - if (!ret && (type == LARX || type == STCX)) { - printk_ratelimited("Breakpoint hit on instruction that can't be emulated." - " Breakpoint at 0x%lx will be disabled.\n", addr); - goto disable; - } + if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER)) + return false; + + return true; +} + +/* + * Return true if the event is valid wrt dawr configuration, + * including extraneous exception. Otherwise return false. + */ +static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, + int type, int size, struct arch_hw_breakpoint *info) +{ + bool in_user_range = dar_in_user_range(regs->dar, info); + bool dawrx_constraints; /* - * If it's extraneous event, we still need to emulate/single- - * step the instruction, but we don't generate an event. + * 8xx supports only one breakpoint and thus we can + * unconditionally return true. */ - if (size && !dar_range_overlaps(regs->dar, size, info)) - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + if (IS_ENABLED(CONFIG_PPC_8xx)) { + if (!in_user_range) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + return true; + } - /* Do not emulate user-space instructions, instead single-step them */ - if (user_mode(regs)) { - current->thread.last_hit_ubp = bp; - regs->msr |= MSR_SE; + if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) { + if (in_user_range) + return true; + + if (dar_in_hw_range(regs->dar, info)) { + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + return true; + } return false; } - if (!emulate_step(regs, instr)) - goto fail; + dawrx_constraints = check_dawrx_constraints(regs, type, info); - return true; + if (dar_user_range_overlaps(regs->dar, size, info)) + return dawrx_constraints; + + if (dar_hw_range_overlaps(regs->dar, size, info)) { + if (dawrx_constraints) { + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + return true; + } + } + return false; +} + +static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, + int *type, int *size, bool *larx_stcx) +{ + struct instruction_op op; + + if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip)) + return; + + analyse_instr(&op, regs, *instr); -fail: /* - * We've failed in reliably handling the hw-breakpoint. Unregister - * it and throw a warning message to let the user know about it. + * Set size = 8 if analyse_instr() fails. If it's a userspace + * watchpoint(valid or extraneous), we can notify user about it. + * If it's a kernel watchpoint, instruction emulation will fail + * in stepping_handler() and watchpoint will be disabled. */ - WARN(1, "Unable to handle hardware breakpoint. Breakpoint at " - "0x%lx will be disabled.", addr); + *type = GETTYPE(op.type); + *size = !(*type == UNKNOWN) ? GETSIZE(op.type) : 8; + *larx_stcx = (*type == LARX || *type == STCX); +} -disable: +/* + * We've failed in reliably handling the hw-breakpoint. Unregister + * it and throw a warning message to let the user know about it. + */ +static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info) +{ + WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.", + info->address); perf_event_disable_inatomic(bp); - return false; +} + +static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info) +{ + printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n", + info->address); + perf_event_disable_inatomic(bp); +} + +static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, + struct arch_hw_breakpoint **info, int *hit, + struct ppc_inst instr) +{ + int i; + int stepped; + + /* Do not emulate user-space instructions, instead single-step them */ + if (user_mode(regs)) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + current->thread.last_hit_ubp[i] = bp[i]; + info[i] = NULL; + } + regs->msr |= MSR_SE; + return false; + } + + stepped = emulate_step(regs, instr); + if (!stepped) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + handler_error(bp[i], info[i]); + info[i] = NULL; + } + return false; + } + return true; } int hw_breakpoint_handler(struct die_args *args) { + bool err = false; int rc = NOTIFY_STOP; - struct perf_event *bp; + struct perf_event *bp[HBP_NUM_MAX] = { NULL }; struct pt_regs *regs = args->regs; - struct arch_hw_breakpoint *info; + struct arch_hw_breakpoint *info[HBP_NUM_MAX] = { NULL }; + int i; + int hit[HBP_NUM_MAX] = {0}; + int nr_hit = 0; + bool ptrace_bp = false; + struct ppc_inst instr = ppc_inst(0); + int type = 0; + int size = 0; + bool larx_stcx = false; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); @@ -311,12 +691,40 @@ int hw_breakpoint_handler(struct die_args *args) */ rcu_read_lock(); - bp = __this_cpu_read(bp_per_reg); - if (!bp) { + if (!IS_ENABLED(CONFIG_PPC_8xx)) + get_instr_detail(regs, &instr, &type, &size, &larx_stcx); + + for (i = 0; i < nr_wp_slots(); i++) { + bp[i] = __this_cpu_read(bp_per_reg[i]); + if (!bp[i]) + continue; + + info[i] = counter_arch_bp(bp[i]); + info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; + + if (check_constraints(regs, instr, type, size, info[i])) { + if (!IS_ENABLED(CONFIG_PPC_8xx) && + ppc_inst_equal(instr, ppc_inst(0))) { + handler_error(bp[i], info[i]); + info[i] = NULL; + err = 1; + continue; + } + + if (is_ptrace_bp(bp[i])) + ptrace_bp = true; + hit[i] = 1; + nr_hit++; + } + } + + if (err) + goto reset; + + if (!nr_hit) { rc = NOTIFY_DONE; goto out; } - info = counter_arch_bp(bp); /* * Return early after invoking user-callback function without restoring @@ -324,29 +732,50 @@ int hw_breakpoint_handler(struct die_args *args) * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal * generated in do_dabr(). */ - if (bp->overflow_handler == ptrace_triggered) { - perf_bp_event(bp, regs); + if (ptrace_bp) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + perf_bp_event(bp[i], regs); + info[i] = NULL; + } rc = NOTIFY_DONE; - goto out; + goto reset; } - info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (IS_ENABLED(CONFIG_PPC_8xx)) { - if (!dar_within_range(regs->dar, info)) - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - } else { - if (!stepping_handler(regs, bp, info)) - goto out; + if (!IS_ENABLED(CONFIG_PPC_8xx)) { + if (larx_stcx) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + larx_stcx_err(bp[i], info[i]); + info[i] = NULL; + } + goto reset; + } + + if (!stepping_handler(regs, bp, info, hit, instr)) + goto reset; } /* * As a policy, the callback is invoked in a 'trigger-after-execute' * fashion */ - if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) - perf_bp_event(bp, regs); + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) + perf_bp_event(bp[i], regs); + } + +reset: + for (i = 0; i < nr_wp_slots(); i++) { + if (!info[i]) + continue; + __set_breakpoint(i, info[i]); + } - __set_breakpoint(info); out: rcu_read_unlock(); return rc; @@ -361,26 +790,43 @@ static int single_step_dabr_instruction(struct die_args *args) struct pt_regs *regs = args->regs; struct perf_event *bp = NULL; struct arch_hw_breakpoint *info; + int i; + bool found = false; - bp = current->thread.last_hit_ubp; /* * Check if we are single-stepping as a result of a * previous HW Breakpoint exception */ - if (!bp) - return NOTIFY_DONE; + for (i = 0; i < nr_wp_slots(); i++) { + bp = current->thread.last_hit_ubp[i]; + + if (!bp) + continue; + + found = true; + info = counter_arch_bp(bp); + + /* + * We shall invoke the user-defined callback function in the + * single stepping handler to confirm to 'trigger-after-execute' + * semantics + */ + if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) + perf_bp_event(bp, regs); + current->thread.last_hit_ubp[i] = NULL; + } - info = counter_arch_bp(bp); + if (!found) + return NOTIFY_DONE; - /* - * We shall invoke the user-defined callback function in the single - * stepping handler to confirm to 'trigger-after-execute' semantics - */ - if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) - perf_bp_event(bp, regs); + for (i = 0; i < nr_wp_slots(); i++) { + bp = __this_cpu_read(bp_per_reg[i]); + if (!bp) + continue; - __set_breakpoint(info); - current->thread.last_hit_ubp = NULL; + info = counter_arch_bp(bp); + __set_breakpoint(i, info); + } /* * If the process was being single-stepped by ptrace, let the @@ -419,10 +865,13 @@ NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify); */ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { + int i; struct thread_struct *t = &tsk->thread; - unregister_hw_breakpoint(t->ptrace_bps[0]); - t->ptrace_bps[0] = NULL; + for (i = 0; i < nr_wp_slots(); i++) { + unregister_hw_breakpoint(t->ptrace_bps[i]); + t->ptrace_bps[i] = NULL; + } } void hw_breakpoint_pmu_read(struct perf_event *bp) diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 433d97bea1f3..69df840f7253 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -187,6 +187,7 @@ BEGIN_FTR_SECTION mtspr SPRN_HID1, r9 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) b transfer_to_handler_cont +_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) .data diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 308f499e146c..72c85b6f3898 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -90,3 +90,4 @@ _GLOBAL(power_save_ppc32_restore) #endif b transfer_to_handler_cont +_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index ca37702bde97..144858027fa3 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -6,14 +6,15 @@ #include <linux/kernel.h> #include <linux/jump_label.h> #include <asm/code-patching.h> +#include <asm/inst.h> void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - u32 *addr = (u32 *)(unsigned long)entry->code; + struct ppc_inst *addr = (struct ppc_inst *)(unsigned long)entry->code; if (type == JUMP_LABEL_JMP) patch_branch(addr, entry->target, 0); else - patch_instruction(addr, PPC_INST_NOP); + patch_instruction(addr, ppc_inst(PPC_INST_NOP)); } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7dd55eb1259d..652b2852bea3 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -26,6 +26,7 @@ #include <asm/debug.h> #include <asm/code-patching.h> #include <linux/slab.h> +#include <asm/inst.h> /* * This table contains the mapping between PowerPC hardware trap types, and @@ -418,13 +419,13 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; unsigned int instr; - unsigned int *addr = (unsigned int *)bpt->bpt_addr; + struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; err = probe_kernel_address(addr, instr); if (err) return err; - err = patch_instruction(addr, BREAK_INSTR); + err = patch_instruction(addr, ppc_inst(BREAK_INSTR)); if (err) return -EFAULT; @@ -437,9 +438,9 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { int err; unsigned int instr = *(unsigned int *)bpt->saved_instr; - unsigned int *addr = (unsigned int *)bpt->bpt_addr; + struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = patch_instruction(addr, instr); + err = patch_instruction(addr, ppc_inst(instr)); if (err) return -EFAULT; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 81efb605113e..6f96f65ebfe8 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -23,6 +23,7 @@ #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/sections.h> +#include <asm/inst.h> #include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -105,7 +106,9 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; - kprobe_opcode_t insn = *p->addr; + struct kprobe *prev; + struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr); + struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1)); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -113,6 +116,17 @@ int arch_prepare_kprobe(struct kprobe *p) } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); ret = -EINVAL; + } else if (ppc_inst_prefixed(prefix)) { + printk("Cannot register a kprobe on the second word of prefixed instruction\n"); + ret = -EINVAL; + } + preempt_disable(); + prev = get_kprobe(p->addr - 1); + preempt_enable_no_resched(); + if (prev && + ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)prev->ainsn.insn))) { + printk("Cannot register a kprobe on the second word of prefixed instruction\n"); + ret = -EINVAL; } /* insn must be on a special executable page on ppc64. This is @@ -124,11 +138,8 @@ int arch_prepare_kprobe(struct kprobe *p) } if (!ret) { - memcpy(p->ainsn.insn, p->addr, - MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - p->opcode = *p->addr; - flush_icache_range((unsigned long)p->ainsn.insn, - (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); + patch_instruction((struct ppc_inst *)p->ainsn.insn, insn); + p->opcode = ppc_inst_val(insn); } p->ainsn.boostable = 0; @@ -138,13 +149,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe); void arch_arm_kprobe(struct kprobe *p) { - patch_instruction(p->addr, BREAKPOINT_INSTRUCTION); + patch_instruction((struct ppc_inst *)p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)); } NOKPROBE_SYMBOL(arch_arm_kprobe); void arch_disarm_kprobe(struct kprobe *p) { - patch_instruction(p->addr, p->opcode); + patch_instruction((struct ppc_inst *)p->addr, ppc_inst(p->opcode)); } NOKPROBE_SYMBOL(arch_disarm_kprobe); @@ -216,7 +227,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; - unsigned int insn = *p->ainsn.insn; + struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); /* regs->nip is also adjusted if emulate_step returns 1 */ ret = emulate_step(regs, insn); @@ -233,7 +244,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) * So, we should never get here... but, its still * good to catch them, just in case... */ - printk("Can't step on instruction %x\n", insn); + printk("Can't step on instruction %x\n", ppc_inst_val(insn)); BUG(); } else { /* @@ -276,14 +287,18 @@ int kprobe_handler(struct pt_regs *regs) p = get_kprobe(addr); if (!p) { - if (*addr != BREAKPOINT_INSTRUCTION) { + unsigned int instr; + + if (probe_kernel_address(addr, instr)) + goto no_kprobe; + + if (instr != BREAKPOINT_INSTRUCTION) { /* * PowerPC has multiple variants of the "trap" * instruction. If the current instruction is a * trap variant, it could belong to someone else */ - kprobe_opcode_t cur_insn = *addr; - if (is_trap(cur_insn)) + if (is_trap(instr)) goto no_kprobe; /* * The breakpoint instruction was removed right @@ -464,14 +479,16 @@ NOKPROBE_SYMBOL(trampoline_probe_handler); */ int kprobe_post_handler(struct pt_regs *regs) { + int len; struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); if (!cur || user_mode(regs)) return 0; + len = ppc_inst_len(ppc_inst_read((struct ppc_inst *)cur->ainsn.insn)); /* make sure we got here for instruction we have a kprobe on */ - if (((unsigned long)cur->ainsn.insn + 4) != regs->nip) + if (((unsigned long)cur->ainsn.insn + len) != regs->nip) return 0; if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { @@ -480,7 +497,7 @@ int kprobe_post_handler(struct pt_regs *regs) } /* Adjust nip to after the single-stepped instruction */ - regs->nip = (unsigned long)cur->addr + 4; + regs->nip = (unsigned long)cur->addr + len; regs->msr |= kcb->kprobe_saved_msr; /*Restore back the original saved kprobes variables and continue. */ diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 2020d255585f..5f07aa5e9851 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -455,5 +455,6 @@ _GLOBAL(__inval_enable_L1) sync blr +_ASM_NOKPROBE_SYMBOL(__inval_enable_L1) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 8077b5fb18a7..fd90c0eda229 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -16,6 +16,7 @@ #include <linux/export.h> #include <linux/irq_work.h> #include <linux/extable.h> +#include <linux/ftrace.h> #include <asm/machdep.h> #include <asm/mce.h> @@ -571,9 +572,16 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); * * regs->nip and regs->msr contains srr0 and ssr1. */ -long machine_check_early(struct pt_regs *regs) +long notrace machine_check_early(struct pt_regs *regs) { long handled = 0; + bool nested = in_nmi(); + u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); + + this_cpu_set_ftrace_enabled(0); + + if (!nested) + nmi_enter(); hv_nmi_check_nonrecoverable(regs); @@ -582,6 +590,12 @@ long machine_check_early(struct pt_regs *regs) */ if (ppc_md.machine_check_early) handled = ppc_md.machine_check_early(regs); + + if (!nested) + nmi_exit(); + + this_cpu_set_ftrace_enabled(ftrace_enabled); + return handled; } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 067b094bfeff..c32af49a5138 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -20,6 +20,7 @@ #include <asm/sstep.h> #include <asm/exception-64s.h> #include <asm/extable.h> +#include <asm/inst.h> /* * Convert an address related to an mm to a PFN. NOTE: we are in real @@ -27,7 +28,7 @@ */ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) { - pte_t *ptep; + pte_t *ptep, pte; unsigned int shift; unsigned long pfn, flags; struct mm_struct *mm; @@ -39,19 +40,23 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) local_irq_save(flags); ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift); + if (!ptep) { + pfn = ULONG_MAX; + goto out; + } + pte = READ_ONCE(*ptep); - if (!ptep || pte_special(*ptep)) { + if (!pte_present(pte) || pte_special(pte)) { pfn = ULONG_MAX; goto out; } if (shift <= PAGE_SHIFT) - pfn = pte_pfn(*ptep); + pfn = pte_pfn(pte); else { unsigned long rpnmask = (1ul << shift) - PAGE_SIZE; - pfn = pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask))); + pfn = pte_pfn(__pte(pte_val(pte) | (addr & rpnmask))); } - out: local_irq_restore(flags); return pfn; @@ -365,7 +370,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, * in real-mode is tricky and can lead to recursive * faults */ - int instr; + struct ppc_inst instr; unsigned long pfn, instr_addr; struct instruction_op op; struct pt_regs tmp = *regs; @@ -373,7 +378,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, pfn = addr_to_pfn(regs, regs->nip); if (pfn != ULONG_MAX) { instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK); - instr = *(unsigned int *)(instr_addr); + instr = ppc_inst_read((struct ppc_inst *)instr_addr); if (!analyse_instr(&op, &tmp, instr)) { pfn = addr_to_pfn(regs, op.ea); *addr = op.ea; diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 65f9f731c229..5be96feccb55 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -36,6 +36,8 @@ _GLOBAL(add_reloc_offset) add r3,r3,r5 mtlr r0 blr +_ASM_NOKPROBE_SYMBOL(reloc_offset) +_ASM_NOKPROBE_SYMBOL(add_reloc_offset) .align 3 2: PPC_LONG 1b diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index d80212be8698..b24f866fef81 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -246,6 +246,7 @@ _GLOBAL(real_readb) sync isync blr +_ASM_NOKPROBE_SYMBOL(real_readb) /* * Do an IO access in real mode @@ -263,6 +264,7 @@ _GLOBAL(real_writeb) sync isync blr +_ASM_NOKPROBE_SYMBOL(real_writeb) #endif /* CONFIG_40x */ @@ -274,17 +276,8 @@ _GLOBAL(real_writeb) #ifndef CONFIG_PPC_8xx _GLOBAL(flush_instruction_cache) #if defined(CONFIG_4xx) -#ifdef CONFIG_403GCX - li r3, 512 - mtctr r3 - lis r4, KERNELBASE@h -1: iccci 0, r4 - addi r4, r4, 16 - bdnz 1b -#else lis r3, KERNELBASE@h iccci 0,r3 -#endif #elif defined(CONFIG_FSL_BOOKE) #ifdef CONFIG_E200 mfspr r3,SPRN_L1CSR0 diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index d7134c614c16..c27b8687b82a 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -67,21 +67,6 @@ static int relacmp(const void *_x, const void *_y) return 0; } -static void relaswap(void *_x, void *_y, int size) -{ - uint32_t *x, *y, tmp; - int i; - - y = (uint32_t *)_x; - x = (uint32_t *)_y; - - for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) { - tmp = x[i]; - x[i] = y[i]; - y[i] = tmp; - } -} - /* Get the potential trampolines size required of the init and non-init sections */ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, @@ -118,7 +103,7 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, */ sort((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf32_Rela), - sizeof(Elf32_Rela), relacmp, relaswap); + sizeof(Elf32_Rela), relacmp, NULL); ret += count_relocs((void *)hdr + sechdrs[i].sh_offset, diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 007606a48fd9..f4c2fa190192 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -20,6 +20,7 @@ #include <linux/sort.h> #include <asm/setup.h> #include <asm/sections.h> +#include <asm/inst.h> /* FIXME: We don't do .init separately. To do this, we'd need to have a separate r2 value in the init and core section, and stub between @@ -144,42 +145,6 @@ static u32 ppc64_stub_insns[] = { PPC_INST_BCTR, }; -#ifdef CONFIG_DYNAMIC_FTRACE -int module_trampoline_target(struct module *mod, unsigned long addr, - unsigned long *target) -{ - struct ppc64_stub_entry *stub; - func_desc_t funcdata; - u32 magic; - - if (!within_module_core(addr, mod)) { - pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - stub = (struct ppc64_stub_entry *)addr; - - if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { - pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - if (magic != STUB_MAGIC) { - pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { - pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - *target = stub_func_addr(funcdata); - - return 0; -} -#endif - /* Count how many different 24-bit relocations (different symbol, different addend) */ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) @@ -226,21 +191,6 @@ static int relacmp(const void *_x, const void *_y) return 0; } -static void relaswap(void *_x, void *_y, int size) -{ - uint64_t *x, *y, tmp; - int i; - - y = (uint64_t *)_x; - x = (uint64_t *)_y; - - for (i = 0; i < sizeof(Elf64_Rela) / sizeof(uint64_t); i++) { - tmp = x[i]; - x[i] = y[i]; - y[i] = tmp; - } -} - /* Get size of potential trampolines required. */ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, const Elf64_Shdr *sechdrs) @@ -264,7 +214,7 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, */ sort((void *)sechdrs[i].sh_addr, sechdrs[i].sh_size / sizeof(Elf64_Rela), - sizeof(Elf64_Rela), relacmp, relaswap); + sizeof(Elf64_Rela), relacmp, NULL); relocs += count_relocs((void *)sechdrs[i].sh_addr, sechdrs[i].sh_size @@ -384,6 +334,92 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, return 0; } +#ifdef CONFIG_MPROFILE_KERNEL + +#define PACATOC offsetof(struct paca_struct, kernel_toc) + +/* + * ld r12,PACATOC(r13) + * addis r12,r12,<high> + * addi r12,r12,<low> + * mtctr r12 + * bctr + */ +static u32 stub_insns[] = { + PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC, + PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12), + PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12), + PPC_INST_MTCTR | __PPC_RS(R12), + PPC_INST_BCTR, +}; + +/* + * For mprofile-kernel we use a special stub for ftrace_caller() because we + * can't rely on r2 containing this module's TOC when we enter the stub. + * + * That can happen if the function calling us didn't need to use the toc. In + * that case it won't have setup r2, and the r2 value will be either the + * kernel's toc, or possibly another modules toc. + * + * To deal with that this stub uses the kernel toc, which is always accessible + * via the paca (in r13). The target (ftrace_caller()) is responsible for + * saving and restoring the toc before returning. + */ +static inline int create_ftrace_stub(struct ppc64_stub_entry *entry, + unsigned long addr, + struct module *me) +{ + long reladdr; + + memcpy(entry->jump, stub_insns, sizeof(stub_insns)); + + /* Stub uses address relative to kernel toc (from the paca) */ + reladdr = addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("%s: Address of %ps out of range of kernel_toc.\n", + me->name, (void *)addr); + return 0; + } + + entry->jump[1] |= PPC_HA(reladdr); + entry->jump[2] |= PPC_LO(reladdr); + + /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */ + entry->funcdata = func_desc(addr); + entry->magic = STUB_MAGIC; + + return 1; +} + +static bool is_mprofile_ftrace_call(const char *name) +{ + if (!strcmp("_mcount", name)) + return true; +#ifdef CONFIG_DYNAMIC_FTRACE + if (!strcmp("ftrace_caller", name)) + return true; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (!strcmp("ftrace_regs_caller", name)) + return true; +#endif +#endif + + return false; +} +#else +static inline int create_ftrace_stub(struct ppc64_stub_entry *entry, + unsigned long addr, + struct module *me) +{ + return 0; +} + +static bool is_mprofile_ftrace_call(const char *name) +{ + return false; +} +#endif + /* * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the * value maximum span in an instruction which uses a signed offset). Round down @@ -399,10 +435,14 @@ static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) static inline int create_stub(const Elf64_Shdr *sechdrs, struct ppc64_stub_entry *entry, unsigned long addr, - struct module *me) + struct module *me, + const char *name) { long reladdr; + if (is_mprofile_ftrace_call(name)) + return create_ftrace_stub(entry, addr, me); + memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns)); /* Stub uses address relative to r2. */ @@ -426,7 +466,8 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, stub to set up the TOC ptr (r2) for the function. */ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, unsigned long addr, - struct module *me) + struct module *me, + const char *name) { struct ppc64_stub_entry *stubs; unsigned int i, num_stubs; @@ -443,62 +484,19 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, return (unsigned long)&stubs[i]; } - if (!create_stub(sechdrs, &stubs[i], addr, me)) + if (!create_stub(sechdrs, &stubs[i], addr, me, name)) return 0; return (unsigned long)&stubs[i]; } -#ifdef CONFIG_MPROFILE_KERNEL -static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction) -{ - if (strcmp("_mcount", name)) - return false; - - /* - * Check if this is one of the -mprofile-kernel sequences. - */ - if (instruction[-1] == PPC_INST_STD_LR && - instruction[-2] == PPC_INST_MFLR) - return true; - - if (instruction[-1] == PPC_INST_MFLR) - return true; - - return false; -} - -/* - * In case of _mcount calls, do not save the current callee's TOC (in r2) into - * the original caller's stack frame. If we did we would clobber the saved TOC - * value of the original caller. - */ -static void squash_toc_save_inst(const char *name, unsigned long addr) -{ - struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr; - - /* Only for calls to _mcount */ - if (strcmp("_mcount", name) != 0) - return; - - stub->jump[2] = PPC_INST_NOP; -} -#else -static void squash_toc_save_inst(const char *name, unsigned long addr) { } - -static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction) -{ - return false; -} -#endif - /* We expect a noop next: if it is, replace it with instruction to restore r2. */ static int restore_r2(const char *name, u32 *instruction, struct module *me) { u32 *prev_insn = instruction - 1; - if (is_mprofile_mcount_callsite(name, prev_insn)) + if (is_mprofile_ftrace_call(name)) return 1; /* @@ -506,7 +504,7 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) * "link" branches and they don't return, so they don't need the r2 * restore afterwards. */ - if (!instr_is_relative_link_branch(*prev_insn)) + if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) return 1; if (*instruction != PPC_INST_NOP) { @@ -636,14 +634,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_LIVEPATCH) { /* External: go via stub */ - value = stub_for_addr(sechdrs, value, me); + value = stub_for_addr(sechdrs, value, me, + strtab + sym->st_name); if (!value) return -ENOENT; if (!restore_r2(strtab + sym->st_name, (u32 *)location + 1, me)) return -ENOEXEC; - - squash_toc_save_inst(strtab + sym->st_name, value); } else value += local_entry_offset(sym); @@ -745,89 +742,51 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } #ifdef CONFIG_DYNAMIC_FTRACE - -#ifdef CONFIG_MPROFILE_KERNEL - -#define PACATOC offsetof(struct paca_struct, kernel_toc) - -/* - * For mprofile-kernel we use a special stub for ftrace_caller() because we - * can't rely on r2 containing this module's TOC when we enter the stub. - * - * That can happen if the function calling us didn't need to use the toc. In - * that case it won't have setup r2, and the r2 value will be either the - * kernel's toc, or possibly another modules toc. - * - * To deal with that this stub uses the kernel toc, which is always accessible - * via the paca (in r13). The target (ftrace_caller()) is responsible for - * saving and restoring the toc before returning. - */ -static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, - struct module *me, unsigned long addr) +int module_trampoline_target(struct module *mod, unsigned long addr, + unsigned long *target) { - struct ppc64_stub_entry *entry; - unsigned int i, num_stubs; - /* - * ld r12,PACATOC(r13) - * addis r12,r12,<high> - * addi r12,r12,<low> - * mtctr r12 - * bctr - */ - static u32 stub_insns[] = { - PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC, - PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_MTCTR | __PPC_RS(R12), - PPC_INST_BCTR, - }; - long reladdr; + struct ppc64_stub_entry *stub; + func_desc_t funcdata; + u32 magic; - num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*entry); + if (!within_module_core(addr, mod)) { + pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name); + return -EFAULT; + } - /* Find the next available stub entry */ - entry = (void *)sechdrs[me->arch.stubs_section].sh_addr; - for (i = 0; i < num_stubs && stub_func_addr(entry->funcdata); i++, entry++); + stub = (struct ppc64_stub_entry *)addr; - if (i >= num_stubs) { - pr_err("%s: Unable to find a free slot for ftrace stub.\n", me->name); - return 0; + if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { + pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; } - memcpy(entry->jump, stub_insns, sizeof(stub_insns)); - - /* Stub uses address relative to kernel toc (from the paca) */ - reladdr = addr - kernel_toc_addr(); - if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - pr_err("%s: Address of %ps out of range of kernel_toc.\n", - me->name, (void *)addr); - return 0; + if (magic != STUB_MAGIC) { + pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; } - entry->jump[1] |= PPC_HA(reladdr); - entry->jump[2] |= PPC_LO(reladdr); + if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { + pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; + } - /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */ - entry->funcdata = func_desc(addr); - entry->magic = STUB_MAGIC; + *target = stub_func_addr(funcdata); - return (unsigned long)entry; -} -#else -static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, - struct module *me, unsigned long addr) -{ - return stub_for_addr(sechdrs, addr, me); + return 0; } -#endif int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { - mod->arch.tramp = create_ftrace_stub(sechdrs, mod, - (unsigned long)ftrace_caller); + mod->arch.tramp = stub_for_addr(sechdrs, + (unsigned long)ftrace_caller, + mod, + "ftrace_caller"); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - mod->arch.tramp_regs = create_ftrace_stub(sechdrs, mod, - (unsigned long)ftrace_regs_caller); + mod->arch.tramp_regs = stub_for_addr(sechdrs, + (unsigned long)ftrace_regs_caller, + mod, + "ftrace_regs_caller"); if (!mod->arch.tramp_regs) return -ENOENT; #endif diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 0cd1c88bfc8b..532f22637783 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -852,8 +852,8 @@ loff_t __init nvram_create_partition(const char *name, int sig, BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16); /* Convert sizes from bytes to blocks */ - req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; - min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + req_size = ALIGN(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + min_size = ALIGN(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; /* If no minimum size specified, make it the same as the * requested size diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 024f7aad1952..69bfe96884e2 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -16,6 +16,7 @@ #include <asm/code-patching.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> +#include <asm/inst.h> #define TMPL_CALL_HDLR_IDX \ (optprobe_template_call_handler - optprobe_template_entry) @@ -99,8 +100,9 @@ static unsigned long can_optimize(struct kprobe *p) * Ensure that the instruction is not a conditional branch, * and that can be emulated. */ - if (!is_conditional_branch(*p->ainsn.insn) && - analyse_instr(&op, ®s, *p->ainsn.insn) == 1) { + if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) && + analyse_instr(&op, ®s, + ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) { emulate_update_regs(®s, &op); nip = regs.nip; } @@ -147,50 +149,57 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) { /* addis r4,0,(insn)@h */ - patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(4) | - ((val >> 16) & 0xffff)); + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ADDIS | ___PPC_RT(4) | + ((val >> 16) & 0xffff))); addr++; /* ori r4,r4,(insn)@l */ - patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(4) | - ___PPC_RS(4) | (val & 0xffff)); + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORI | ___PPC_RA(4) | + ___PPC_RS(4) | (val & 0xffff))); } /* * Generate instructions to load provided immediate 64-bit value - * to register 'r3' and patch these instructions at 'addr'. + * to register 'reg' and patch these instructions at 'addr'. */ -void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr) +void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { - /* lis r3,(op)@highest */ - patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(3) | - ((val >> 48) & 0xffff)); + /* lis reg,(op)@highest */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | + ((val >> 48) & 0xffff))); addr++; - /* ori r3,r3,(op)@higher */ - patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) | - ___PPC_RS(3) | ((val >> 32) & 0xffff)); + /* ori reg,reg,(op)@higher */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | + ___PPC_RS(reg) | ((val >> 32) & 0xffff))); addr++; - /* rldicr r3,r3,32,31 */ - patch_instruction(addr, PPC_INST_RLDICR | ___PPC_RA(3) | - ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31)); + /* rldicr reg,reg,32,31 */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | + ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); addr++; - /* oris r3,r3,(op)@h */ - patch_instruction(addr, PPC_INST_ORIS | ___PPC_RA(3) | - ___PPC_RS(3) | ((val >> 16) & 0xffff)); + /* oris reg,reg,(op)@h */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | + ___PPC_RS(reg) | ((val >> 16) & 0xffff))); addr++; - /* ori r3,r3,(op)@l */ - patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) | - ___PPC_RS(3) | (val & 0xffff)); + /* ori reg,reg,(op)@l */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | + ___PPC_RS(reg) | (val & 0xffff))); } int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { - kprobe_opcode_t *buff, branch_op_callback, branch_emulate_step; - kprobe_opcode_t *op_callback_addr, *emulate_step_addr; + struct ppc_inst branch_op_callback, branch_emulate_step, temp; + kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; long b_offset; unsigned long nip, size; int rc, i; @@ -230,7 +239,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int); pr_devel("Copying template to %p, size %lu\n", buff, size); for (i = 0; i < size; i++) { - rc = patch_instruction(buff + i, *(optprobe_template_entry + i)); + rc = patch_instruction((struct ppc_inst *)(buff + i), + ppc_inst(*(optprobe_template_entry + i))); if (rc < 0) goto error; } @@ -239,7 +249,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * Fixup the template with instructions to: * 1. load the address of the actual probepoint */ - patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); + patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); /* * 2. branch to optimized_callback() and emulate_step() @@ -251,29 +261,34 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) goto error; } - branch_op_callback = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX, - (unsigned long)op_callback_addr, - BRANCH_SET_LINK); + rc = create_branch(&branch_op_callback, + (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), + (unsigned long)op_callback_addr, + BRANCH_SET_LINK); - branch_emulate_step = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX, - (unsigned long)emulate_step_addr, - BRANCH_SET_LINK); + rc |= create_branch(&branch_emulate_step, + (struct ppc_inst *)(buff + TMPL_EMULATE_IDX), + (unsigned long)emulate_step_addr, + BRANCH_SET_LINK); - if (!branch_op_callback || !branch_emulate_step) + if (rc) goto error; - patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback); - patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step); + patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), + branch_op_callback); + patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX), + branch_emulate_step); /* * 3. load instruction to be emulated into relevant register, and */ - patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); + temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); + patch_imm64_load_insns(ppc_inst_as_u64(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline */ - patch_branch(buff + TMPL_RET_IDX, (unsigned long)nip, 0); + patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0); flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX])); @@ -305,6 +320,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op) void arch_optimize_kprobes(struct list_head *oplist) { + struct ppc_inst instr; struct optimized_kprobe *op; struct optimized_kprobe *tmp; @@ -315,9 +331,10 @@ void arch_optimize_kprobes(struct list_head *oplist) */ memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE); - patch_instruction(op->kp.addr, - create_branch((unsigned int *)op->kp.addr, - (unsigned long)op->optinsn.insn, 0)); + create_branch(&instr, + (struct ppc_inst *)op->kp.addr, + (unsigned long)op->optinsn.insn, 0); + patch_instruction((struct ppc_inst *)op->kp.addr, instr); list_del_init(&op->list); } } diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index cf383520843f..ff8ba4d3824d 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -94,6 +94,9 @@ optprobe_template_insn: /* 2, Pass instruction to be emulated in r4 */ nop nop + nop + nop + nop .global optprobe_template_call_emulate optprobe_template_call_emulate: diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 3f91ccaa9c74..8d96169c597e 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -16,6 +16,7 @@ #include <asm/kexec.h> #include <asm/svm.h> #include <asm/ultravisor.h> +#include <asm/rtas.h> #include "setup.h" @@ -164,6 +165,30 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) #endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_PPC_PSERIES +/** + * new_rtas_args() - Allocates rtas args + * @cpu: CPU number + * @limit: Memory limit for this allocation + * + * Allocates a struct rtas_args and return it's pointer, + * if not in Hypervisor mode + * + * Return: Pointer to allocated rtas_args + * NULL if CPU in Hypervisor Mode + */ +static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) +{ + limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX); + + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + return NULL; + + return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES, + limit, cpu); +} +#endif /* CONFIG_PPC_PSERIES */ + /* The Paca is an array with one entry per processor. Each contains an * lppaca, which contains the information shared between the * hypervisor and Linux. @@ -202,6 +227,10 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int /* For now -- if we have threads this will be adjusted later */ new_paca->tcd_ptr = &new_paca->tcd; #endif + +#ifdef CONFIG_PPC_PSERIES + new_paca->rtas_args_reentrant = NULL; +#endif } /* Put the paca pointer into r13 and SPRG_PACA */ @@ -274,6 +303,9 @@ void __init allocate_paca(int cpu) #ifdef CONFIG_PPC_BOOK3S_64 paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); #endif +#ifdef CONFIG_PPC_PSERIES + paca->rtas_args_reentrant = new_rtas_args(cpu, limit); +#endif paca_struct_size += sizeof(struct paca_struct); } diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index bf83f76563a3..2fc12198ec07 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -57,8 +57,6 @@ void pcibios_release_device(struct pci_dev *dev) struct pci_controller *phb = pci_bus_to_host(dev->bus); struct pci_dn *pdn = pci_get_pdn(dev); - eeh_remove_device(dev); - if (phb->controller_ops.release_device) phb->controller_ops.release_device(dev); diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index d9ac980c398c..9312e6eda7ff 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -100,7 +100,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus) pci_name(bus->self)); #ifdef CONFIG_PPC_BOOK3S_64 - __flush_hash_table_range(&init_mm, res->start + _IO_BASE, + __flush_hash_table_range(res->start + _IO_BASE, res->end + _IO_BASE + 1); #endif return 0; @@ -154,8 +154,8 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose) unsigned long size_page; unsigned long io_virt_offset; - phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE); - size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE); + phys_page = ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE); + size_page = ALIGN(hose->pci_io_size, PAGE_SIZE); /* Make sure IO area address is clear */ hose->io_base_alloc = NULL; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9c21288f8645..048d64c4e115 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -629,15 +629,12 @@ void do_break (struct pt_regs *regs, unsigned long address, if (debugger_break_match(regs)) return; - /* Clear the breakpoint */ - hw_breakpoint_disable(); - /* Deliver the signal to userspace */ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address); } #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk); +static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk[HBP_NUM_MAX]); #ifdef CONFIG_PPC_ADV_DEBUG_REGS /* @@ -711,21 +708,49 @@ void switch_booke_debug_regs(struct debug_reg *new_debug) EXPORT_SYMBOL_GPL(switch_booke_debug_regs); #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ #ifndef CONFIG_HAVE_HW_BREAKPOINT -static void set_breakpoint(struct arch_hw_breakpoint *brk) +static void set_breakpoint(int i, struct arch_hw_breakpoint *brk) { preempt_disable(); - __set_breakpoint(brk); + __set_breakpoint(i, brk); preempt_enable(); } static void set_debug_reg_defaults(struct thread_struct *thread) { - thread->hw_brk.address = 0; - thread->hw_brk.type = 0; - thread->hw_brk.len = 0; - thread->hw_brk.hw_len = 0; - if (ppc_breakpoint_available()) - set_breakpoint(&thread->hw_brk); + int i; + struct arch_hw_breakpoint null_brk = {0}; + + for (i = 0; i < nr_wp_slots(); i++) { + thread->hw_brk[i] = null_brk; + if (ppc_breakpoint_available()) + set_breakpoint(i, &thread->hw_brk[i]); + } +} + +static inline bool hw_brk_match(struct arch_hw_breakpoint *a, + struct arch_hw_breakpoint *b) +{ + if (a->address != b->address) + return false; + if (a->type != b->type) + return false; + if (a->len != b->len) + return false; + /* no need to check hw_len. it's calculated from address and len */ + return true; +} + +static void switch_hw_breakpoint(struct task_struct *new) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (likely(hw_brk_match(this_cpu_ptr(¤t_brk[i]), + &new->thread.hw_brk[i]))) + continue; + + __set_breakpoint(i, &new->thread.hw_brk[i]); + } } #endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -772,12 +797,12 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW | LCTRL1_CRWF_RW; unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN; - unsigned long start_addr = brk->address & ~HW_BREAKPOINT_ALIGN; - unsigned long end_addr = (brk->address + brk->len - 1) | HW_BREAKPOINT_ALIGN; + unsigned long start_addr = ALIGN_DOWN(brk->address, HW_BREAKPOINT_SIZE); + unsigned long end_addr = ALIGN(brk->address + brk->len, HW_BREAKPOINT_SIZE); if (start_addr == 0) lctrl2 |= LCTRL2_LW0LA_F; - else if (end_addr == ~0U) + else if (end_addr == 0) lctrl2 |= LCTRL2_LW0LA_E; else lctrl2 |= LCTRL2_LW0LA_EandF; @@ -793,20 +818,20 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO; mtspr(SPRN_CMPE, start_addr - 1); - mtspr(SPRN_CMPF, end_addr + 1); + mtspr(SPRN_CMPF, end_addr); mtspr(SPRN_LCTRL1, lctrl1); mtspr(SPRN_LCTRL2, lctrl2); return 0; } -void __set_breakpoint(struct arch_hw_breakpoint *brk) +void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) { - memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); + memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); if (dawr_enabled()) // Power8 or later - set_dawr(brk); + set_dawr(nr, brk); else if (IS_ENABLED(CONFIG_PPC_8xx)) set_breakpoint_8xx(brk); else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) @@ -829,19 +854,6 @@ bool ppc_breakpoint_available(void) } EXPORT_SYMBOL_GPL(ppc_breakpoint_available); -static inline bool hw_brk_match(struct arch_hw_breakpoint *a, - struct arch_hw_breakpoint *b) -{ - if (a->address != b->address) - return false; - if (a->type != b->type) - return false; - if (a->len != b->len) - return false; - /* no need to check hw_len. it's calculated from address and len */ - return true; -} - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline bool tm_enabled(struct task_struct *tsk) @@ -1174,8 +1186,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) - __set_breakpoint(&new->thread.hw_brk); + switch_hw_breakpoint(new); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -1228,7 +1239,8 @@ struct task_struct *__switch_to(struct task_struct *prev, * mappings, we must issue a cp_abort to clear any state and * prevent snooping, corruption or a covert channel. */ - if (current->thread.used_vas) + if (current->mm && + atomic_read(¤t->mm->context.vas_windows)) asm volatile(PPC_CP_ABORT); } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -1412,7 +1424,7 @@ void show_regs(struct pt_regs * regs) print_msr_bits(regs->msr); pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); - if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) + if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) @@ -1467,27 +1479,6 @@ void arch_setup_new_exec(void) } #endif -int set_thread_uses_vas(void) -{ -#ifdef CONFIG_PPC_BOOK3S_64 - if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -EINVAL; - - current->thread.used_vas = 1; - - /* - * Even a process that has no foreign real address mapping can use - * an unpaired COPY instruction (to no real effect). Issue CP_ABORT - * to clear any pending COPY and prevent a covert channel. - * - * __switch_to() will issue CP_ABORT on future context switches. - */ - asm volatile(PPC_CP_ABORT); - -#endif /* CONFIG_PPC_BOOK3S_64 */ - return 0; -} - #ifdef CONFIG_PPC64 /** * Assign a TIDR (thread ID) for task @t and set it in the thread @@ -1610,6 +1601,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, void (*f)(void); unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; struct thread_info *ti = task_thread_info(p); +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int i; +#endif klp_init_thread_info(p); @@ -1669,7 +1663,8 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, p->thread.ksp_limit = (unsigned long)end_of_stack(p); #endif #ifdef CONFIG_HAVE_HW_BREAKPOINT - p->thread.ptrace_bps[0] = NULL; + for (i = 0; i < nr_wp_slots(); i++) + p->thread.ptrace_bps[i] = NULL; #endif p->thread.fp_save_area = NULL; @@ -1740,7 +1735,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) * FULL_REGS(regs) return true. This is necessary to allow * ptrace to examine the thread immediately after exec. */ - regs->trap &= ~1UL; + SET_FULL_REGS(regs); #ifdef CONFIG_PPC32 regs->mq = 0; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 6620f37abe73..6a3bac357e24 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -96,8 +96,8 @@ static inline int overlaps_initrd(unsigned long start, unsigned long size) if (!initrd_start) return 0; - return (start + size) > _ALIGN_DOWN(initrd_start, PAGE_SIZE) && - start <= _ALIGN_UP(initrd_end, PAGE_SIZE); + return (start + size) > ALIGN_DOWN(initrd_start, PAGE_SIZE) && + start <= ALIGN(initrd_end, PAGE_SIZE); #else return 0; #endif @@ -515,9 +515,14 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, size = 0x80000000ul - base; } + if (!validate_mem_limit(base, &size)) + continue; + DBG("Adding: %llx -> %llx\n", base, size); - if (validate_mem_limit(base, &size)) - memblock_add(base, size); + memblock_add(base, size); + + if (lmb->flags & DRCONF_MEM_HOTREMOVABLE) + memblock_mark_hotplug(base, size); } while (--rngs); } #endif /* CONFIG_PPC_PSERIES */ @@ -623,9 +628,9 @@ static void __init early_reserve_mem(void) #ifdef CONFIG_BLK_DEV_INITRD /* Then reserve the initrd, if any */ if (initrd_start && (initrd_end > initrd_start)) { - memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), - _ALIGN_UP(initrd_end, PAGE_SIZE) - - _ALIGN_DOWN(initrd_start, PAGE_SIZE)); + memblock_reserve(ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), + ALIGN(initrd_end, PAGE_SIZE) - + ALIGN_DOWN(initrd_start, PAGE_SIZE)); } #endif /* CONFIG_BLK_DEV_INITRD */ @@ -685,6 +690,23 @@ static void __init tm_init(void) static void tm_init(void) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#ifdef CONFIG_PPC64 +static void __init save_fscr_to_task(void) +{ + /* + * Ensure the init_task (pid 0, aka swapper) uses the value of FSCR we + * have configured via the device tree features or via __init_FSCR(). + * That value will then be propagated to pid 1 (init) and all future + * processes. + */ + if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + init_task.thread.fscr = mfspr(SPRN_FSCR); +} +#else +static inline void save_fscr_to_task(void) {}; +#endif + + void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -773,6 +795,8 @@ void __init early_init_devtree(void *params) BUG(); } + save_fscr_to_task(); + #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) /* We'll later wait for secondaries to check in; there are * NCPUS-1 non-boot CPUs :-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 806be751c336..5f15b10eb007 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -920,7 +920,7 @@ struct option_vector6 { } __packed; struct ibm_arch_vec { - struct { u32 mask, val; } pvrs[12]; + struct { u32 mask, val; } pvrs[14]; u8 num_vectors; @@ -974,6 +974,14 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .val = cpu_to_be32(0x004e0000), }, { + .mask = cpu_to_be32(0xffff0000), /* POWER10 */ + .val = cpu_to_be32(0x00800000), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 3.1-compliant */ + .val = cpu_to_be32(0x0f000006), + }, + { .mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */ .val = cpu_to_be32(0x0f000005), }, @@ -1002,7 +1010,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .byte1 = 0, .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, - .arch_versions3 = OV1_PPC_3_00, + .arch_versions3 = OV1_PPC_3_00 | OV1_PPC_3_1, }, .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), @@ -1449,18 +1457,18 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) unsigned long addr = 0; if (align) - base = _ALIGN_UP(base, align); + base = ALIGN(base, align); prom_debug("%s(%lx, %lx)\n", __func__, size, align); if (ram_top == 0) prom_panic("alloc_up() called with mem not initialized\n"); if (align) - base = _ALIGN_UP(alloc_bottom, align); + base = ALIGN(alloc_bottom, align); else base = alloc_bottom; for(; (base + size) <= alloc_top; - base = _ALIGN_UP(base + 0x100000, align)) { + base = ALIGN(base + 0x100000, align)) { prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) @@ -1500,7 +1508,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, if (highmem) { /* Carve out storage for the TCE table. */ - addr = _ALIGN_DOWN(alloc_top_high - size, align); + addr = ALIGN_DOWN(alloc_top_high - size, align); if (addr <= alloc_bottom) return 0; /* Will we bump into the RMO ? If yes, check out that we @@ -1518,9 +1526,9 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, goto bail; } - base = _ALIGN_DOWN(alloc_top - size, align); + base = ALIGN_DOWN(alloc_top - size, align); for (; base > alloc_bottom; - base = _ALIGN_DOWN(base - 0x100000, align)) { + base = ALIGN_DOWN(base - 0x100000, align)) { prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) @@ -1586,8 +1594,8 @@ static void __init reserve_mem(u64 base, u64 size) * have our terminator with "size" set to 0 since we are * dumb and just copy this entire array to the boot params */ - base = _ALIGN_DOWN(base, PAGE_SIZE); - top = _ALIGN_UP(top, PAGE_SIZE); + base = ALIGN_DOWN(base, PAGE_SIZE); + top = ALIGN(top, PAGE_SIZE); size = top - base; if (cnt >= (MEM_RESERVE_MAP_SIZE - 1)) @@ -2426,7 +2434,7 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, { void *ret; - *mem_start = _ALIGN(*mem_start, align); + *mem_start = ALIGN(*mem_start, align); while ((*mem_start + needed) > *mem_end) { unsigned long room, chunk; @@ -2562,7 +2570,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, *lp++ = *p; } *lp = 0; - *mem_start = _ALIGN((unsigned long)lp + 1, 4); + *mem_start = ALIGN((unsigned long)lp + 1, 4); } /* get it again for debugging */ @@ -2608,7 +2616,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, /* push property content */ valp = make_room(mem_start, mem_end, l, 4); call_prom("getprop", 4, 1, node, pname, valp, l); - *mem_start = _ALIGN(*mem_start, 4); + *mem_start = ALIGN(*mem_start, 4); if (!prom_strcmp(pname, "phandle")) has_phandle = 1; @@ -2667,7 +2675,7 @@ static void __init flatten_device_tree(void) prom_panic ("couldn't get device tree root\n"); /* Build header and make room for mem rsv map */ - mem_start = _ALIGN(mem_start, 4); + mem_start = ALIGN(mem_start, 4); hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4); dt_header_start = (unsigned long)hdr; diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index f87e7c5c3bf3..697c7e4b5877 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -44,7 +44,7 @@ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo) dbginfo->version = 1; dbginfo->num_instruction_bps = 0; if (ppc_breakpoint_available()) - dbginfo->num_data_bps = 1; + dbginfo->num_data_bps = nr_wp_slots(); else dbginfo->num_data_bps = 0; dbginfo->num_condition_regs = 0; @@ -67,11 +67,16 @@ int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, /* We only support one DABR and no IABRS at the moment */ if (addr > 0) return -EINVAL; - dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | - (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + dabr_fake = ((child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR)); return put_user(dabr_fake, datalp); } +/* + * ptrace_set_debugreg() fakes DABR and DABR is only one. So even if + * internal hw supports more than one watchpoint, we support only one + * watchpoint with this interface. + */ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -137,7 +142,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l return ret; thread->ptrace_bps[0] = bp; - thread->hw_brk = hw_brk; + thread->hw_brk[0] = hw_brk; return 0; } @@ -159,12 +164,37 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l if (set_bp && (!ppc_breakpoint_available())) return -ENODEV; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ - task->thread.hw_brk = hw_brk; + task->thread.hw_brk[0] = hw_brk; return 0; } +#ifdef CONFIG_HAVE_HW_BREAKPOINT +static int find_empty_ptrace_bp(struct thread_struct *thread) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (!thread->ptrace_bps[i]) + return i; + } + return -1; +} +#endif + +static int find_empty_hw_brk(struct thread_struct *thread) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (!thread->hw_brk[i].address) + return i; + } + return -1; +} + long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) { + int i; #ifdef CONFIG_HAVE_HW_BREAKPOINT int len = 0; struct thread_struct *thread = &child->thread; @@ -186,7 +216,7 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf if ((unsigned long)bp_info->addr >= TASK_SIZE) return -EIO; - brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN; + brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE); brk.type = HW_BRK_TYPE_TRANSLATE; brk.len = DABR_MAX_LEN; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) @@ -200,8 +230,9 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf len = 1; else return -EINVAL; - bp = thread->ptrace_bps[0]; - if (bp) + + i = find_empty_ptrace_bp(thread); + if (i < 0) return -ENOSPC; /* Create a new breakpoint request if one doesn't exist already */ @@ -211,27 +242,28 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf arch_bp_generic_fields(brk.type, &attr.bp_type); bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child); - thread->ptrace_bps[0] = bp; + thread->ptrace_bps[i] = bp; if (IS_ERR(bp)) { - thread->ptrace_bps[0] = NULL; + thread->ptrace_bps[i] = NULL; return PTR_ERR(bp); } - return 1; + return i + 1; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) return -EINVAL; - if (child->thread.hw_brk.address) + i = find_empty_hw_brk(&child->thread); + if (i < 0) return -ENOSPC; if (!ppc_breakpoint_available()) return -ENODEV; - child->thread.hw_brk = brk; + child->thread.hw_brk[i] = brk; - return 1; + return i + 1; } long ppc_del_hwdebug(struct task_struct *child, long data) @@ -241,24 +273,24 @@ long ppc_del_hwdebug(struct task_struct *child, long data) struct thread_struct *thread = &child->thread; struct perf_event *bp; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ - if (data != 1) + if (data < 1 || data > nr_wp_slots()) return -EINVAL; #ifdef CONFIG_HAVE_HW_BREAKPOINT - bp = thread->ptrace_bps[0]; + bp = thread->ptrace_bps[data - 1]; if (bp) { unregister_hw_breakpoint(bp); - thread->ptrace_bps[0] = NULL; + thread->ptrace_bps[data - 1] = NULL; } else { ret = -ENOENT; } return ret; #else /* CONFIG_HAVE_HW_BREAKPOINT */ - if (child->thread.hw_brk.address == 0) + if (child->thread.hw_brk[data - 1].address == 0) return -ENOENT; - child->thread.hw_brk.address = 0; - child->thread.hw_brk.type = 0; + child->thread.hw_brk[data - 1].address = 0; + child->thread.hw_brk[data - 1].type = 0; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ return 0; diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index d75aff31f637..32d62c606681 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -43,7 +43,7 @@ static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr) static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap) { - task->thread.ckpt_regs.trap = trap & 0xfff0; + set_trap(&task->thread.ckpt_regs, trap); return 0; } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 15e3b79b6395..caeb5822a8f4 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -149,7 +149,7 @@ static int set_user_dscr(struct task_struct *task, unsigned long dscr) */ static int set_user_trap(struct task_struct *task, unsigned long trap) { - task->thread.regs->trap = trap & 0xfff0; + set_trap(task->thread.regs, trap); return 0; } diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c index 7976ddf29c0e..7589a9665ffb 100644 --- a/arch/powerpc/kernel/ptrace/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -259,8 +259,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, ret = put_user(child->thread.debug.dac1, (u32 __user *)data); #else dabr_fake = ( - (child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | - (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + (child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR)); ret = put_user(dabr_fake, (u32 __user *)data); #endif break; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c5fa251b8950..a09eba03f180 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -41,6 +41,7 @@ #include <asm/time.h> #include <asm/mmu.h> #include <asm/topology.h> +#include <asm/paca.h> /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); @@ -1014,6 +1015,57 @@ out: free_cpumask_var(offline_mask); return atomic_read(&data.error); } + +/** + * rtas_call_reentrant() - Used for reentrant rtas calls + * @token: Token for desired reentrant RTAS call + * @nargs: Number of Input Parameters + * @nret: Number of Output Parameters + * @outputs: Array of outputs + * @...: Inputs for desired RTAS call + * + * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off", + * "ibm,get-xive" and "ibm,set-xive" are currently reentrant. + * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but + * PACA one instead. + * + * Return: -1 on error, + * First output value of RTAS call if (nret > 0), + * 0 otherwise, + */ +int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) +{ + va_list list; + struct rtas_args *args; + unsigned long flags; + int i, ret = 0; + + if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) + return -1; + + local_irq_save(flags); + preempt_disable(); + + /* We use the per-cpu (PACA) rtas args buffer */ + args = local_paca->rtas_args_reentrant; + + va_start(list, outputs); + va_rtas_call_unlocked(args, token, nargs, nret, list); + va_end(list); + + if (nret > 1 && outputs) + for (i = 0; i < nret - 1; ++i) + outputs[i] = be32_to_cpu(args->rets[i + 1]); + + if (nret > 0) + ret = be32_to_cpu(args->rets[0]); + + local_irq_restore(flags); + preempt_enable(); + + return ret; +} + #else /* CONFIG_PPC_PSERIES */ int rtas_ibm_suspend_me(u64 handle) { diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index bd70f5be1c27..d86701ce116b 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -7,6 +7,8 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/device.h> +#include <linux/nospec.h> +#include <linux/prctl.h> #include <linux/seq_buf.h> #include <asm/asm-prototypes.h> @@ -14,6 +16,7 @@ #include <asm/debugfs.h> #include <asm/security_features.h> #include <asm/setup.h> +#include <asm/inst.h> u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; @@ -353,6 +356,40 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute * return sprintf(buf, "Vulnerable\n"); } +static int ssb_prctl_get(struct task_struct *task) +{ + if (stf_enabled_flush_types == STF_BARRIER_NONE) + /* + * We don't have an explicit signal from firmware that we're + * vulnerable or not, we only have certain CPU revisions that + * are known to be vulnerable. + * + * We assume that if we're on another CPU, where the barrier is + * NONE, then we are not vulnerable. + */ + return PR_SPEC_NOT_AFFECTED; + else + /* + * If we do have a barrier type then we are vulnerable. The + * barrier is not a global or per-process mitigation, so the + * only value we can report here is PR_SPEC_ENABLE, which + * appears as "vulnerable" in /proc. + */ + return PR_SPEC_ENABLE; + + return -EINVAL; +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_get(task); + default: + return -ENODEV; + } +} + #ifdef CONFIG_DEBUG_FS static int stf_barrier_set(void *data, u64 val) { @@ -403,9 +440,11 @@ static void toggle_count_cache_flush(bool enable) enable = false; if (!enable) { - patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP); + patch_instruction_site(&patch__call_flush_count_cache, + ppc_inst(PPC_INST_NOP)); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP); + patch_instruction_site(&patch__call_kvm_flush_link_stack, + ppc_inst(PPC_INST_NOP)); #endif pr_info("link-stack-flush: software flush disabled.\n"); link_stack_flush_enabled = false; @@ -428,7 +467,8 @@ static void toggle_count_cache_flush(bool enable) // If we just need to flush the link stack, patch an early return if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { - patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR); + patch_instruction_site(&patch__flush_link_stack_return, + ppc_inst(PPC_INST_BLR)); no_count_cache_flush(); return; } @@ -439,7 +479,7 @@ static void toggle_count_cache_flush(bool enable) return; } - patch_instruction_site(&patch__flush_count_cache_return, PPC_INST_BLR); + patch_instruction_site(&patch__flush_count_cache_return, ppc_inst(PPC_INST_BLR)); count_cache_flush_type = COUNT_CACHE_FLUSH_HW; pr_info("count-cache-flush: hardware assisted flush sequence enabled\n"); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index f9c0d888ce8a..c376a0588039 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -306,10 +306,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) } } else { switch (PVR_VER(pvr)) { - case 0x0020: /* 403 family */ - maj = PVR_MAJ(pvr) + 1; - min = PVR_MIN(pvr); - break; case 0x1008: /* 740P/750P ?? */ maj = ((pvr >> 8) & 0xFF) - 1; min = pvr & 0xFF; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 305ca89d856f..d642e42eabb1 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -74,20 +74,20 @@ EXPORT_SYMBOL(DMA_MODE_WRITE); */ notrace void __init machine_init(u64 dt_ptr) { - unsigned int *addr = (unsigned int *)patch_site_addr(&patch__memset_nocache); - unsigned long insn; + struct ppc_inst *addr = (struct ppc_inst *)patch_site_addr(&patch__memset_nocache); + struct ppc_inst insn; /* Configure static keys first, now that we're relocated. */ setup_feature_keys(); - early_ioremap_setup(); + early_ioremap_init(); /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - patch_instruction_site(&patch__memcpy_nocache, PPC_INST_NOP); + patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_INST_NOP)); - insn = create_cond_branch(addr, branch_target(addr), 0x820000); + create_cond_branch(&insn, addr, branch_target(addr), 0x820000); patch_instruction(addr, insn); /* replace b by bne cr0 */ /* Do some early initialization based on the flat device tree */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8105010b0e76..bb47555d48a2 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -711,7 +711,7 @@ void __init exc_lvl_early_init(void) */ void __init emergency_stack_init(void) { - u64 limit; + u64 limit, mce_limit; unsigned int i; /* @@ -728,7 +728,16 @@ void __init emergency_stack_init(void) * initialized in kernel/irq.c. These are initialized here in order * to have emergency stacks available as early as possible. */ - limit = min(ppc64_bolted_size(), ppc64_rma_size); + limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size); + + /* + * Machine check on pseries calls rtas, but can't use the static + * rtas_args due to a machine check hitting while the lock is held. + * rtas args have to be under 4GB, so the machine check stack is + * limited to 4GB so args can be put on stack. + */ + if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G) + mce_limit = SZ_4G; for_each_possible_cpu(i) { paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; @@ -738,7 +747,7 @@ void __init emergency_stack_init(void) paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; /* emergency stack for machine check exception handling. */ - paca_ptrs[i]->mc_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; + paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE; #endif } } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index a264989626fd..b4143b6ff093 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -198,7 +198,10 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, int restart = 1; /* syscall ? */ - if (TRAP(regs) != 0x0C00) + if (!trap_is_syscall(regs)) + return; + + if (trap_norestart(regs)) return; /* error signalled ? */ @@ -258,19 +261,24 @@ static void do_signal(struct task_struct *tsk) if (ksig.sig <= 0) { /* No signal to deliver -- put the saved sigmask back */ restore_saved_sigmask(); - tsk->thread.regs->trap = 0; + set_trap_norestart(tsk->thread.regs); return; /* no signals delivered */ } -#ifndef CONFIG_PPC_ADV_DEBUG_REGS /* * Reenable the DABR before delivering the signal to * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (tsk->thread.hw_brk.address && tsk->thread.hw_brk.type) - __set_breakpoint(&tsk->thread.hw_brk); -#endif + if (!IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) { + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (tsk->thread.hw_brk[i].address && tsk->thread.hw_brk[i].type) + __set_breakpoint(i, &tsk->thread.hw_brk[i]); + } + } + /* Re-enable the breakpoints for the signal stack */ thread_change_pc(tsk, tsk->thread.regs); @@ -285,7 +293,7 @@ static void do_signal(struct task_struct *tsk) ret = handle_rt_signal64(&ksig, oldset, tsk); } - tsk->thread.regs->trap = 0; + set_trap_norestart(tsk->thread.regs); signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP)); } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 4f96d29a22bf..ae3da7440b2f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -500,7 +500,7 @@ static long restore_user_regs(struct pt_regs *regs, if (!sig) save_r2 = (unsigned int)regs->gpr[2]; err = restore_general_regs(regs, sr); - regs->trap = 0; + set_trap_norestart(regs); err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); if (!sig) regs->gpr[2] = (unsigned long) save_r2; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index adfde59cf4ba..77061915897f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -350,8 +350,8 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]); err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]); err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]); - /* skip SOFTE */ - regs->trap = 0; + /* Don't allow userspace to set SOFTE */ + set_trap_norestart(regs); err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); @@ -472,10 +472,8 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, &sc->gp_regs[PT_XER]); err |= __get_user(tsk->thread.ckpt_regs.ccr, &sc->gp_regs[PT_CCR]); - - /* Don't allow userspace to set the trap value */ - regs->trap = 0; - + /* Don't allow userspace to set SOFTE */ + set_trap_norestart(regs); /* These regs are not checkpointed; they can go in 'regs'. */ err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6d2a3a3666f0..c820c95162ff 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1383,7 +1383,7 @@ void __init smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_SCHED_SMT if (has_big_cores) { - pr_info("Using small cores at SMT level\n"); + pr_info("Big cores detected but using small core scheduling\n"); power9_topology[0].mask = smallcore_smt_mask; powerpc_topology[0].mask = smallcore_smt_mask; } diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index cbdf86228eaa..f73f4d72fea4 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -395,6 +395,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) li r3,0 blr +_ASM_NOKPROBE_SYMBOL(swsusp_arch_resume) /* FIXME:This construct is actually not useful since we don't shut * down the instruction MMU, we could just flip back MSR-DR on. @@ -406,4 +407,5 @@ turn_on_mmu: sync isync rfi +_ASM_NOKPROBE_SYMBOL(turn_on_mmu) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 7b7c89cad901..79edba3ab312 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -102,6 +102,31 @@ notrace long system_call_exception(long r3, long r4, long r5, } /* + * local irqs must be disabled. Returns false if the caller must re-enable + * them, check for new work, and try again. + */ +static notrace inline bool prep_irq_for_enabled_exit(void) +{ + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + + /* This pattern matches prep_irq_for_idle */ + __hard_EE_RI_disable(); + if (unlikely(lazy_irq_pending_nocheck())) { + /* Took an interrupt, may have more exit work to do. */ + __hard_RI_enable(); + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + return false; + } + local_paca->irq_happened = 0; + irq_soft_mask_set(IRQS_ENABLED); + + return true; +} + +/* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call * exit assembly should additionally load all GPR registers and CTR and XER @@ -186,21 +211,10 @@ again: } } - /* This must be done with RI=1 because tracing may touch vmaps */ - trace_hardirqs_on(); - - /* This pattern matches prep_irq_for_idle */ - __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending_nocheck())) { - __hard_RI_enable(); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (unlikely(!prep_irq_for_enabled_exit())) { local_irq_enable(); - /* Took an interrupt, may have more exit work to do. */ goto again; } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; @@ -228,6 +242,10 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned BUG_ON(!FULL_REGS(regs)); BUG_ON(regs->softe != IRQS_ENABLED); + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * AMR can only have been unlocked if we interrupted the kernel. + */ kuap_check_amr(); local_irq_save(flags); @@ -264,19 +282,11 @@ again: } } - trace_hardirqs_on(); - __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending_nocheck())) { - __hard_RI_enable(); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (unlikely(!prep_irq_for_enabled_exit())) { local_irq_enable(); local_irq_disable(); - /* Took an interrupt, may have more exit work to do. */ goto again; } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); #ifdef CONFIG_PPC_BOOK3E if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) { @@ -307,13 +317,14 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long flags; unsigned long ret = 0; + unsigned long amr; if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI))) unrecoverable_exception(regs); BUG_ON(regs->msr & MSR_PR); BUG_ON(!FULL_REGS(regs)); - kuap_check_amr(); + amr = kuap_get_and_check_amr(); if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) { clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp); @@ -334,13 +345,7 @@ again: } } - trace_hardirqs_on(); - __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending_nocheck())) { - __hard_RI_enable(); - irq_soft_mask_set(IRQS_ALL_DISABLED); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (unlikely(!prep_irq_for_enabled_exit())) { /* * Can't local_irq_restore to replay if we were in * interrupt context. Must replay directly. @@ -354,8 +359,6 @@ again: /* Took an interrupt, may have more exit work to do. */ goto again; } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); } else { /* Returning to a kernel context with local irqs disabled. */ __hard_EE_RI_disable(); @@ -369,10 +372,11 @@ again: #endif /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. + * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr, + * which would cause Read-After-Write stalls. Hence, we take the AMR + * value from the check above. */ - kuap_restore_amr(regs); + kuap_restore_amr(regs, amr); return ret; } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 479c70680b76..571b3259697e 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -19,6 +19,7 @@ #include <asm/smp.h> #include <asm/pmc.h> #include <asm/firmware.h> +#include <asm/idle.h> #include <asm/svm.h> #include "cacheinfo.h" @@ -760,6 +761,74 @@ static void create_svm_file(void) } #endif /* CONFIG_PPC_SVM */ +#ifdef CONFIG_PPC_PSERIES +static void read_idle_purr(void *val) +{ + u64 *ret = val; + + *ret = read_this_idle_purr(); +} + +static ssize_t idle_purr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + u64 val; + + smp_call_function_single(cpu->dev.id, read_idle_purr, &val, 1); + return sprintf(buf, "%llx\n", val); +} +static DEVICE_ATTR(idle_purr, 0400, idle_purr_show, NULL); + +static void create_idle_purr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_create_file(s, &dev_attr_idle_purr); +} + +static void remove_idle_purr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_remove_file(s, &dev_attr_idle_purr); +} + +static void read_idle_spurr(void *val) +{ + u64 *ret = val; + + *ret = read_this_idle_spurr(); +} + +static ssize_t idle_spurr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + u64 val; + + smp_call_function_single(cpu->dev.id, read_idle_spurr, &val, 1); + return sprintf(buf, "%llx\n", val); +} +static DEVICE_ATTR(idle_spurr, 0400, idle_spurr_show, NULL); + +static void create_idle_spurr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_create_file(s, &dev_attr_idle_spurr); +} + +static void remove_idle_spurr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_remove_file(s, &dev_attr_idle_spurr); +} + +#else /* CONFIG_PPC_PSERIES */ +#define create_idle_purr_file(s) +#define remove_idle_purr_file(s) +#define create_idle_spurr_file(s) +#define remove_idle_spurr_file(s) +#endif /* CONFIG_PPC_PSERIES */ + static int register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -823,10 +892,13 @@ static int register_cpu_online(unsigned int cpu) if (!firmware_has_feature(FW_FEATURE_LPAR)) add_write_permission_dev_attr(&dev_attr_purr); device_create_file(s, &dev_attr_purr); + create_idle_purr_file(s); } - if (cpu_has_feature(CPU_FTR_SPURR)) + if (cpu_has_feature(CPU_FTR_SPURR)) { device_create_file(s, &dev_attr_spurr); + create_idle_spurr_file(s); + } if (cpu_has_feature(CPU_FTR_DSCR)) device_create_file(s, &dev_attr_dscr); @@ -910,11 +982,15 @@ static int unregister_cpu_online(unsigned int cpu) device_remove_file(s, &dev_attr_mmcra); #endif /* CONFIG_PMU_SYSFS */ - if (cpu_has_feature(CPU_FTR_PURR)) + if (cpu_has_feature(CPU_FTR_PURR)) { device_remove_file(s, &dev_attr_purr); + remove_idle_purr_file(s); + } - if (cpu_has_feature(CPU_FTR_SPURR)) + if (cpu_has_feature(CPU_FTR_SPURR)) { device_remove_file(s, &dev_attr_spurr); + remove_idle_spurr_file(s); + } if (cpu_has_feature(CPU_FTR_DSCR)) device_remove_file(s, &dev_attr_dscr); diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 7ea0ca044b65..5e399628f51a 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -27,6 +27,7 @@ #include <asm/code-patching.h> #include <asm/ftrace.h> #include <asm/syscall.h> +#include <asm/inst.h> #ifdef CONFIG_DYNAMIC_FTRACE @@ -40,23 +41,23 @@ #define NUM_FTRACE_TRAMPS 8 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; -static unsigned int +static struct ppc_inst ftrace_call_replace(unsigned long ip, unsigned long addr, int link) { - unsigned int op; + struct ppc_inst op; addr = ppc_function_entry((void *)addr); /* if (link) set op to 'bl' else 'b' */ - op = create_branch((unsigned int *)ip, addr, link ? 1 : 0); + create_branch(&op, (struct ppc_inst *)ip, addr, link ? 1 : 0); return op; } static int -ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) +ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) { - unsigned int replaced; + struct ppc_inst replaced; /* * Note: @@ -67,18 +68,18 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) */ /* read the text we want to modify */ - if (probe_kernel_read(&replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (probe_kernel_read_inst(&replaced, (void *)ip)) return -EFAULT; /* Make sure it is what we expect it to be */ - if (replaced != old) { + if (!ppc_inst_equal(replaced, old)) { pr_err("%p: replaced (%#x) != old (%#x)", - (void *)ip, replaced, old); + (void *)ip, ppc_inst_val(replaced), ppc_inst_val(old)); return -EINVAL; } /* replace the text with the new text */ - if (patch_instruction((unsigned int *)ip, new)) + if (patch_instruction((struct ppc_inst *)ip, new)) return -EPERM; return 0; @@ -89,27 +90,28 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) */ static int test_24bit_addr(unsigned long ip, unsigned long addr) { + struct ppc_inst op; addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ - return create_branch((unsigned int *)ip, addr, 0); + return create_branch(&op, (struct ppc_inst *)ip, addr, 0) == 0; } -static int is_bl_op(unsigned int op) +static int is_bl_op(struct ppc_inst op) { - return (op & 0xfc000003) == 0x48000001; + return (ppc_inst_val(op) & 0xfc000003) == 0x48000001; } -static int is_b_op(unsigned int op) +static int is_b_op(struct ppc_inst op) { - return (op & 0xfc000003) == 0x48000000; + return (ppc_inst_val(op) & 0xfc000003) == 0x48000000; } -static unsigned long find_bl_target(unsigned long ip, unsigned int op) +static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op) { int offset; - offset = (op & 0x03fffffc); + offset = (ppc_inst_val(op) & 0x03fffffc); /* make it signed */ if (offset & 0x02000000) offset |= 0xfe000000; @@ -125,17 +127,17 @@ __ftrace_make_nop(struct module *mod, { unsigned long entry, ptr, tramp; unsigned long ip = rec->ip; - unsigned int op, pop; + struct ppc_inst op, pop; /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); return -EINVAL; } @@ -160,16 +162,18 @@ __ftrace_make_nop(struct module *mod, #ifdef CONFIG_MPROFILE_KERNEL /* When using -mkernel_profile there is no load to jump over */ - pop = PPC_INST_NOP; + pop = ppc_inst(PPC_INST_NOP); - if (probe_kernel_read(&op, (void *)(ip - 4), 4)) { + if (probe_kernel_read_inst(&op, (void *)(ip - 4))) { pr_err("Fetching instruction at %lx failed.\n", ip - 4); return -EFAULT; } /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ - if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) { - pr_err("Unexpected instruction %08x around bl _mcount\n", op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) && + !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { + pr_err("Unexpected instruction %08x around bl _mcount\n", + ppc_inst_val(op)); return -EINVAL; } #else @@ -187,24 +191,24 @@ __ftrace_make_nop(struct module *mod, * Use a b +8 to jump over the load. */ - pop = PPC_INST_BRANCH | 8; /* b +8 */ + pop = ppc_inst(PPC_INST_BRANCH | 8); /* b +8 */ /* * Check what is in the next instruction. We can see ld r2,40(r1), but * on first pass after boot we will see mflr r0. */ - if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) { + if (probe_kernel_read_inst(&op, (void *)(ip + 4))) { pr_err("Fetching op failed.\n"); return -EFAULT; } - if (op != PPC_INST_LD_TOC) { - pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { + pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, ppc_inst_val(op)); return -EINVAL; } #endif /* CONFIG_MPROFILE_KERNEL */ - if (patch_instruction((unsigned int *)ip, pop)) { + if (patch_instruction((struct ppc_inst *)ip, pop)) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -217,7 +221,7 @@ static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; + struct ppc_inst op; unsigned int jmp[4]; unsigned long ip = rec->ip; unsigned long tramp; @@ -227,7 +231,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); return -EINVAL; } @@ -274,9 +278,9 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } - op = PPC_INST_NOP; + op = ppc_inst(PPC_INST_NOP); - if (patch_instruction((unsigned int *)ip, op)) + if (patch_instruction((struct ppc_inst *)ip, op)) return -EPERM; return 0; @@ -287,6 +291,7 @@ __ftrace_make_nop(struct module *mod, static unsigned long find_ftrace_tramp(unsigned long ip) { int i; + struct ppc_inst instr; /* * We have the compiler generated long_branch tramps at the end @@ -295,7 +300,8 @@ static unsigned long find_ftrace_tramp(unsigned long ip) for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--) if (!ftrace_tramps[i]) continue; - else if (create_branch((void *)ip, ftrace_tramps[i], 0)) + else if (create_branch(&instr, (void *)ip, + ftrace_tramps[i], 0) == 0) return ftrace_tramps[i]; return 0; @@ -322,8 +328,10 @@ static int add_ftrace_tramp(unsigned long tramp) */ static int setup_mcount_compiler_tramp(unsigned long tramp) { - int i, op; + int i; + struct ppc_inst op; unsigned long ptr; + struct ppc_inst instr; static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS]; /* Is this a known long jump tramp? */ @@ -341,7 +349,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) return -1; /* New trampoline -- read where this goes */ - if (probe_kernel_read(&op, (void *)tramp, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)tramp)) { pr_debug("Fetching opcode failed.\n"); return -1; } @@ -366,13 +374,13 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) #else ptr = ppc_global_function_entry((void *)ftrace_caller); #endif - if (!create_branch((void *)tramp, ptr, 0)) { + if (create_branch(&instr, (void *)tramp, ptr, 0)) { pr_debug("%ps is not reachable from existing mcount tramp\n", (void *)ptr); return -1; } - if (patch_branch((unsigned int *)tramp, ptr, 0)) { + if (patch_branch((struct ppc_inst *)tramp, ptr, 0)) { pr_debug("REL24 out of range!\n"); return -1; } @@ -388,17 +396,17 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) { unsigned long tramp, ip = rec->ip; - unsigned int op; + struct ppc_inst op; /* Read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); return -EINVAL; } @@ -416,7 +424,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) } } - if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) { + if (patch_instruction((struct ppc_inst *)ip, ppc_inst(PPC_INST_NOP))) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -428,7 +436,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - unsigned int old, new; + struct ppc_inst old, new; /* * If the calling address is more that 24 bits away, @@ -438,7 +446,7 @@ int ftrace_make_nop(struct module *mod, if (test_24bit_addr(ip, addr)) { /* within range */ old = ftrace_call_replace(ip, addr, 1); - new = PPC_INST_NOP; + new = ppc_inst(PPC_INST_NOP); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) return __ftrace_make_nop_kernel(rec, addr); @@ -481,7 +489,7 @@ int ftrace_make_nop(struct module *mod, */ #ifndef CONFIG_MPROFILE_KERNEL static int -expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) +expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) { /* * We expect to see: @@ -492,16 +500,17 @@ expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) * The load offset is different depending on the ABI. For simplicity * just mask it out when doing the compare. */ - if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000)) + if (!ppc_inst_equal(op0, ppc_inst(0x48000008)) || + (ppc_inst_val(op1) & 0xffff0000) != 0xe8410000) return 0; return 1; } #else static int -expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) +expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) { /* look for patched "NOP" on ppc64 with -mprofile-kernel */ - if (op0 != PPC_INST_NOP) + if (!ppc_inst_equal(op0, ppc_inst(PPC_INST_NOP))) return 0; return 1; } @@ -510,18 +519,22 @@ expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op[2]; + struct ppc_inst op[2]; + struct ppc_inst instr; void *ip = (void *)rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; /* read where this goes */ - if (probe_kernel_read(op, ip, sizeof(op))) + if (probe_kernel_read_inst(op, ip)) + return -EFAULT; + + if (probe_kernel_read_inst(op + 1, ip + 4)) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { pr_err("Unexpected call sequence at %p: %x %x\n", - ip, op[0], op[1]); + ip, ppc_inst_val(op[0]), ppc_inst_val(op[1])); return -EINVAL; } @@ -557,7 +570,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* Ensure branch is within 24 bits */ - if (!create_branch(ip, tramp, BRANCH_SET_LINK)) { + if (create_branch(&instr, ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } @@ -574,16 +587,17 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; + int err; + struct ppc_inst op; unsigned long ip = rec->ip; /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + if (probe_kernel_read_inst(&op, (void *)ip)) return -EFAULT; /* It should be pointing to a nop */ - if (op != PPC_INST_NOP) { - pr_err("Expected NOP but have %x\n", op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + pr_err("Expected NOP but have %x\n", ppc_inst_val(op)); return -EINVAL; } @@ -594,16 +608,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* create the branch to the trampoline */ - op = create_branch((unsigned int *)ip, - rec->arch.mod->arch.tramp, BRANCH_SET_LINK); - if (!op) { + err = create_branch(&op, (struct ppc_inst *)ip, + rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + if (err) { pr_err("REL24 out of range!\n"); return -EINVAL; } pr_devel("write to %lx\n", rec->ip); - if (patch_instruction((unsigned int *)ip, op)) + if (patch_instruction((struct ppc_inst *)ip, op)) return -EPERM; return 0; @@ -613,7 +627,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; + struct ppc_inst op; void *ip = (void *)rec->ip; unsigned long tramp, entry, ptr; @@ -634,13 +648,13 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } /* Make sure we have a nop */ - if (probe_kernel_read(&op, ip, sizeof(op))) { + if (probe_kernel_read_inst(&op, ip)) { pr_err("Unable to read ftrace location %p\n", ip); return -EFAULT; } - if (op != PPC_INST_NOP) { - pr_err("Unexpected call sequence at %p: %x\n", ip, op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + pr_err("Unexpected call sequence at %p: %x\n", ip, ppc_inst_val(op)); return -EINVAL; } @@ -661,7 +675,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - unsigned int old, new; + struct ppc_inst old, new; /* * If the calling address is more that 24 bits away, @@ -670,7 +684,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) */ if (test_24bit_addr(ip, addr)) { /* within range */ - old = PPC_INST_NOP; + old = ppc_inst(PPC_INST_NOP); new = ftrace_call_replace(ip, addr, 1); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) @@ -700,7 +714,7 @@ static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - unsigned int op; + struct ppc_inst op; unsigned long ip = rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; @@ -712,14 +726,14 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); return -EINVAL; } @@ -748,7 +762,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* The new target may be within range */ if (test_24bit_addr(ip, addr)) { /* within range */ - if (patch_branch((unsigned int *)ip, addr, BRANCH_SET_LINK)) { + if (patch_branch((struct ppc_inst *)ip, addr, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -776,12 +790,12 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* Ensure branch is within 24 bits */ - if (!create_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) { + if (create_branch(&op, (struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } - if (patch_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) { + if (patch_branch((struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -794,7 +808,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { unsigned long ip = rec->ip; - unsigned int old, new; + struct ppc_inst old, new; /* * If the calling address is more that 24 bits away, @@ -834,10 +848,10 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); - unsigned int old, new; + struct ppc_inst old, new; int ret; - old = *(unsigned int *)&ftrace_call; + old = ppc_inst_read((struct ppc_inst *)&ftrace_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); @@ -845,7 +859,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) /* Also update the regs callback function */ if (!ret) { ip = (unsigned long)(&ftrace_regs_call); - old = *(unsigned int *)&ftrace_regs_call; + old = ppc_inst_read((struct ppc_inst *)&ftrace_regs_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); } @@ -919,7 +933,7 @@ int ftrace_enable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - unsigned int old, new; + struct ppc_inst old, new; old = ftrace_call_replace(ip, stub, 0); new = ftrace_call_replace(ip, addr, 0); @@ -932,7 +946,7 @@ int ftrace_disable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - unsigned int old, new; + struct ppc_inst old, new; old = ftrace_call_replace(ip, addr, 0); new = ftrace_call_replace(ip, stub, 0); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b44dd75de517..067e501f2202 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -442,6 +442,9 @@ void system_reset_exception(struct pt_regs *regs) { unsigned long hsrr0, hsrr1; bool saved_hsrrs = false; + u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); + + this_cpu_set_ftrace_enabled(0); nmi_enter(); @@ -504,11 +507,11 @@ out: #ifdef CONFIG_PPC_BOOK3S_64 BUG_ON(get_paca()->in_nmi == 0); if (get_paca()->in_nmi > 1) - nmi_panic(regs, "Unrecoverable nested System Reset"); + die("Unrecoverable nested System Reset", regs, SIGABRT); #endif /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - nmi_panic(regs, "Unrecoverable System Reset"); + die("Unrecoverable System Reset", regs, SIGABRT); if (saved_hsrrs) { mtspr(SPRN_HSRR0, hsrr0); @@ -517,6 +520,8 @@ out: nmi_exit(); + this_cpu_set_ftrace_enabled(ftrace_enabled); + /* What should we do here? We could issue a shutdown or hard reset. */ } @@ -576,6 +581,8 @@ static inline int check_io_access(struct pt_regs *regs) #define REASON_ILLEGAL (ESR_PIL | ESR_PUO) #define REASON_PRIVILEGED ESR_PPR #define REASON_TRAP ESR_PTR +#define REASON_PREFIXED 0 +#define REASON_BOUNDARY 0 /* single-step stuff */ #define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC) @@ -590,12 +597,16 @@ static inline int check_io_access(struct pt_regs *regs) #define REASON_ILLEGAL SRR1_PROGILL #define REASON_PRIVILEGED SRR1_PROGPRIV #define REASON_TRAP SRR1_PROGTRAP +#define REASON_PREFIXED SRR1_PREFIXED +#define REASON_BOUNDARY SRR1_BOUNDARY #define single_stepping(regs) ((regs)->msr & MSR_SE) #define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) #define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE) #endif +#define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4) + #if defined(CONFIG_E500) int machine_check_e500mc(struct pt_regs *regs) { @@ -817,7 +828,19 @@ void machine_check_exception(struct pt_regs *regs) { int recover = 0; - nmi_enter(); + /* + * BOOK3S_64 does not call this handler as a non-maskable interrupt + * (it uses its own early real-mode handler to handle the MCE proper + * and then raises irq_work to call this handler when interrupts are + * enabled). + * + * This is silly. The BOOK3S_64 should just call a different function + * rather than expecting semantics to magically change. Something + * like 'non_nmi_machine_check_exception()', perhaps? + */ + const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64); + + if (nmi) nmi_enter(); __this_cpu_inc(irq_stat.mce_exceptions); @@ -843,18 +866,18 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - nmi_exit(); + if (nmi) nmi_exit(); die("Machine check", regs, SIGBUS); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - nmi_panic(regs, "Unrecoverable Machine check"); + die("Unrecoverable Machine check", regs, SIGBUS); return; bail: - nmi_exit(); + if (nmi) nmi_exit(); } void SMIException(struct pt_regs *regs) @@ -1583,11 +1606,20 @@ void alignment_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; + unsigned long reason; /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); + reason = get_reason(regs); + + if (reason & REASON_BOUNDARY) { + sig = SIGBUS; + code = BUS_ADRALN; + goto bad; + } + if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT)) goto bail; @@ -1596,7 +1628,8 @@ void alignment_exception(struct pt_regs *regs) fixed = fix_alignment(regs); if (fixed == 1) { - regs->nip += 4; /* skip over emulated instruction */ + /* skip over emulated instruction */ + regs->nip += inst_length(reason); emulate_single_step(regs); goto bail; } @@ -1609,6 +1642,7 @@ void alignment_exception(struct pt_regs *regs) sig = SIGBUS; code = BUS_ADRALN; } +bad: if (user_mode(regs)) _exception(sig, regs, code, regs->dar); else @@ -1710,6 +1744,7 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TAR_LG] = "TAR", [FSCR_MSGP_LG] = "MSGP", [FSCR_SCV_LG] = "SCV", + [FSCR_PREFIX_LG] = "PREFIX", }; char *facility = "unknown"; u64 value; diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 1cfef0e5fec5..d200e7df7167 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -14,6 +14,7 @@ #include <linux/kdebug.h> #include <asm/sstep.h> +#include <asm/inst.h> #define UPROBE_TRAP_NR UINT_MAX @@ -111,7 +112,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * support doesn't exist and have to fix-up the next instruction * to be executed. */ - regs->nip = utask->vaddr + MAX_UINSN_BYTES; + regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, &auprobe->insn); user_disable_single_step(current); return 0; @@ -173,7 +174,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) * emulate_step() returns 1 if the insn was successfully emulated. * For all other cases, we need to single-step in hardware. */ - ret = emulate_step(regs, auprobe->insn); + ret = emulate_step(regs, ppc_inst_read(&auprobe->insn)); if (ret > 0) return true; diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index 4acd3fb2b38e..ae632569446f 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -10,6 +10,7 @@ #include <asm/processor.h> #include <asm/switch_to.h> #include <linux/uaccess.h> +#include <asm/inst.h> /* Functions in vector.S */ extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); @@ -260,21 +261,24 @@ static unsigned int rfin(unsigned int x) int emulate_altivec(struct pt_regs *regs) { - unsigned int instr, i; + struct ppc_inst instr; + unsigned int i, word; unsigned int va, vb, vc, vd; vector128 *vrs; - if (get_user(instr, (unsigned int __user *) regs->nip)) + if (get_user_instr(instr, (void __user *)regs->nip)) return -EFAULT; - if ((instr >> 26) != 4) + + word = ppc_inst_val(instr); + if (ppc_inst_primary_opcode(instr) != 4) return -EINVAL; /* not an altivec instruction */ - vd = (instr >> 21) & 0x1f; - va = (instr >> 16) & 0x1f; - vb = (instr >> 11) & 0x1f; - vc = (instr >> 6) & 0x1f; + vd = (word >> 21) & 0x1f; + va = (word >> 16) & 0x1f; + vb = (word >> 11) & 0x1f; + vc = (word >> 6) & 0x1f; vrs = current->thread.vr_state.vr; - switch (instr & 0x3f) { + switch (word & 0x3f) { case 10: switch (vc) { case 0: /* vaddfp */ diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index d20c5e79e03c..efc5b52f95d2 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -89,6 +89,7 @@ _GLOBAL(load_up_altivec) REST_32VRS(0,r4,r6) /* restore registers and return */ blr +_ASM_NOKPROBE_SYMBOL(load_up_altivec) /* * save_altivec(tsk) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index a1706b63b82d..326e113d2e45 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -15,7 +15,6 @@ #include <asm/thread_info.h> #define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT) -#define ETEXT_ALIGN_SIZE (1 << CONFIG_ETEXT_SHIFT) ENTRY(_stext) @@ -117,7 +116,7 @@ SECTIONS } :text - . = ALIGN(ETEXT_ALIGN_SIZE); + . = ALIGN(PAGE_SIZE); _etext = .; PROVIDE32 (etext = .); |