From eeedcea69e927857d32aaf089725eddd2c79dd0a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 26 Jun 2015 08:09:29 +0100 Subject: ARM: 8395/1: l2c: Add support for the "arm,shared-override" property "CoreLink Level 2 Cache Controller L2C-310", p. 2-15, section 2.3.2 Shareable attribute" states: "The default behavior of the cache controller with respect to the shareable attribute is to transform Normal Memory Non-cacheable transactions into: - cacheable no allocate for reads - write through no write allocate for writes." Depending on the system architecture, this may cause memory corruption in the presence of bus mastering devices (e.g. OHCI). To avoid such corruption, the default behavior can be disabled by setting the Shared Override bit in the Auxiliary Control register. Currently the Shared Override bit can be set only using C code: - by calling l2x0_init() directly, which is deprecated, - by setting/clearing the bit in the machine_desc.l2c_aux_val/mask fields, but using values differing from 0/~0 is also deprecated. Hence add support for an "arm,shared-override" device tree property for the l2c device node. By specifying this property, affected systems can indicate that non-cacheable transactions must not be transformed. Then, it's up to the OS to decide. The current behavior is to set the "shared attribute override enable" bit, as there may exist kernel linear mappings and cacheable aliases for the DMA buffers, even if CMA is enabled. See also commit 1a8e41cd672f894b ("ARM: 6395/1: VExpress: Set bit 22 in the PL310 (cache controller) AuxCtlr register"): "Clearing bit 22 in the PL310 Auxiliary Control register (shared attribute override enable) has the side effect of transforming Normal Shared Non-cacheable reads into Cacheable no-allocate reads. Coherent DMA buffers in Linux always have a Cacheable alias via the kernel linear mapping and the processor can speculatively load cache lines into the PL310 controller. With bit 22 cleared, Non-cacheable reads would unexpectedly hit such cache lines leading to buffer corruption." Signed-off-by: Geert Uytterhoeven Acked-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 71b3d3309024..493692d838c6 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1171,6 +1171,11 @@ static void __init l2c310_of_parse(const struct device_node *np, } } + if (of_property_read_bool(np, "arm,shared-override")) { + *aux_val |= L2C_AUX_CTRL_SHARED_OVERRIDE; + *aux_mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE; + } + prefetch = l2x0_saved_regs.prefetch_ctrl; ret = of_property_read_u32(np, "arm,double-linefill", &val); -- cgit v1.2.3 From 8ded1e1a92daa96307e4b84b707fee5993bc6047 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 7 Jul 2015 18:16:15 +0100 Subject: ARM: 8401/1: perf: Set affinity for PPI based PMUs For PPI based PMUs, we bail out early in of_pmu_irq_cfg() without setting the PMU's supported_cpus bitmap. This causes the smp_call_function_any() in armv7_probe_num_events() to fail. Set the bitmap to be all CPUs so that we properly probe PMUs that use PPIs. Fixes: cc88116da0d1 ("arm: perf: treat PMUs as CPU affine") Cc: Mark Rutland Signed-off-by: Stephen Boyd Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 357f57ea83f4..f3ddd0ff2d8b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -795,8 +795,10 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); - if (irq >= 0 && irq_is_percpu(irq)) + if (irq >= 0 && irq_is_percpu(irq)) { + cpumask_setall(&pmu->supported_cpus); return 0; + } irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) -- cgit v1.2.3 From 57853e8906a04a86c32fb96d8421b923c6d64162 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Jul 2015 18:19:38 +0100 Subject: ARM: 8403/1: kbuild: don't use generic mcs_spinlock.h header We provide our own implementation of asm/mcs_spinlock.h, so there's no need to ask for the (empty) generic version. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 83c50193626c..517ef6dd22b9 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -12,7 +12,6 @@ generic-y += irq_regs.h generic-y += kdebug.h generic-y += local.h generic-y += local64.h -generic-y += mcs_spinlock.h generic-y += msgbuf.h generic-y += param.h generic-y += parport.h -- cgit v1.2.3 From 96f0e00378d4a1fc1b79933ef84e1595015de808 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 3 Sep 2014 23:57:13 +0100 Subject: ARM: add basic support for on-demand backtrace of other CPUs As we now have generic infrastructure to support backtracing of other CPUs in the system on lockups, we can start to implement this for ARM. Initially, we add an IPI based implementation, as the GIC code needs modification to support the generation of FIQ IPIs, and not all ARM platforms have the ability to raise a FIQ in the non-secure world. This provides us with a "best efforts" implementation in the absence of FIQs. Signed-off-by: Russell King --- arch/arm/include/asm/irq.h | 5 +++++ arch/arm/kernel/smp.c | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index 53c15dec7af6..be1d07d59ee9 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -35,6 +35,11 @@ extern void (*handle_arch_irq)(struct pt_regs *); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); #endif +#ifdef CONFIG_SMP +extern void arch_trigger_all_cpu_backtrace(bool); +#define arch_trigger_all_cpu_backtrace(x) arch_trigger_all_cpu_backtrace(x) +#endif + #endif #endif diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 90dfbedfbfb8..3a20c386fd33 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,7 @@ enum ipi_msg_type { IPI_CPU_STOP, IPI_IRQ_WORK, IPI_COMPLETION, + IPI_CPU_BACKTRACE = 15, }; static DECLARE_COMPLETION(cpu_running); @@ -630,6 +632,12 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; + case IPI_CPU_BACKTRACE: + irq_enter(); + nmi_cpu_backtrace(regs); + irq_exit(); + break; + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); @@ -724,3 +732,13 @@ static int __init register_cpufreq_notifier(void) core_initcall(register_cpufreq_notifier); #endif + +static void raise_nmi(cpumask_t *mask) +{ + smp_cross_call(mask, IPI_CPU_BACKTRACE); +} + +void arch_trigger_all_cpu_backtrace(bool include_self) +{ + nmi_trigger_all_cpu_backtrace(include_self, raise_nmi); +} -- cgit v1.2.3 From 0e6f07f29cfb8d79dbbdb12560a73f7121ba324e Mon Sep 17 00:00:00 2001 From: Alex Bennée Date: Tue, 7 Jul 2015 17:29:55 +0100 Subject: KVM: arm: guest debug, add stub KVM_SET_GUEST_DEBUG ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds a stub function to support the KVM_SET_GUEST_DEBUG ioctl. Any unsupported flag will return -EINVAL. For now, only KVM_GUESTDBG_ENABLE is supported, although it won't have any effects. Signed-off-by: Alex Bennée . Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 2 +- arch/arm/kvm/arm.c | 7 ------- arch/arm/kvm/guest.c | 6 ++++++ arch/arm64/kvm/guest.c | 27 +++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 8 deletions(-) (limited to 'arch/arm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 9f746eab333d..19adfd385882 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2671,7 +2671,7 @@ handled. 4.87 KVM_SET_GUEST_DEBUG Capability: KVM_CAP_SET_GUEST_DEBUG -Architectures: x86, s390, ppc +Architectures: x86, s390, ppc, arm64 Type: vcpu ioctl Parameters: struct kvm_guest_debug (in) Returns: 0 on success; -1 on error diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index bc738d2b8392..1b693cb2d5b2 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -301,13 +301,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_arm_set_running_vcpu(NULL); } -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) -{ - return -EINVAL; -} - - int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index d503fbb787d3..96e935bbc38c 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -290,3 +290,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, { return -EINVAL; } + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 9535bd555d1d..0ba86775235d 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -331,3 +331,30 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, { return -EINVAL; } + +#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE) + +/** + * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging + * @kvm: pointer to the KVM struct + * @kvm_guest_debug: the ioctl data buffer + * + * This sets up and enables the VM for guest debugging. Userspace + * passes in a control flag to enable different debug types and + * potentially other architecture specific information in the rest of + * the structure. + */ +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) + return -EINVAL; + + if (dbg->control & KVM_GUESTDBG_ENABLE) { + vcpu->guest_debug = dbg->control; + } else { + /* If not enabled clear all flags */ + vcpu->guest_debug = 0; + } + return 0; +} -- cgit v1.2.3 From 56c7f5e77f797fd0dcf2376ce1496f4238e6be33 Mon Sep 17 00:00:00 2001 From: Alex Bennée Date: Tue, 7 Jul 2015 17:29:56 +0100 Subject: KVM: arm: introduce kvm_arm_init/setup/clear_debug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a precursor for later patches which will need to do more to setup debug state before entering the hyp.S switch code. The existing functionality for setting mdcr_el2 has been moved out of hyp.S and now uses the value kept in vcpu->arch.mdcr_el2. As the assembler used to previously mask and preserve MDCR_EL2.HPMN I've had to add a mechanism to save the value of mdcr_el2 as a per-cpu variable during the initialisation code. The kernel never sets this number so we are assuming the bootcode has set up the correct value here. This also moves the conditional setting of the TDA bit from the hyp code into the C code which is currently used for the lazy debug register context switch code. Signed-off-by: Alex Bennée Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 4 ++ arch/arm/kvm/arm.c | 6 +++ arch/arm64/include/asm/kvm_asm.h | 2 + arch/arm64/include/asm/kvm_host.h | 5 +++ arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/debug.c | 81 +++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp.S | 19 ++++----- 8 files changed, 108 insertions(+), 12 deletions(-) create mode 100644 arch/arm64/kvm/debug.c (limited to 'arch/arm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index e896d2c196e6..2b0bc8c57552 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -231,4 +231,8 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arm_init_debug(void) {} +static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} +static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 1b693cb2d5b2..77151b111d32 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -543,6 +543,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) continue; } + kvm_arm_setup_debug(vcpu); + /************************************************************** * Enter the guest */ @@ -557,6 +559,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * Back from guest *************************************************************/ + kvm_arm_clear_debug(vcpu); + /* * We may have taken a host interrupt in HYP mode (ie * while executing the guest). This interrupt is still @@ -914,6 +918,8 @@ static void cpu_init_hyp_mode(void *dummy) vector_ptr = (unsigned long)__kvm_hyp_vector; __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); + + kvm_arm_init_debug(); } static int hyp_init_cpu_notify(struct notifier_block *self, diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 3c5fe685a2d6..f5e40dae291a 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -132,6 +132,8 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_ich_vtr_el2(void); +extern u32 __kvm_get_mdcr_el2(void); + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2709db2a7eac..c90c6a41c448 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -103,6 +103,7 @@ struct kvm_vcpu_arch { /* HYP configuration */ u64 hcr_el2; + u32 mdcr_el2; /* Exception Information */ struct kvm_vcpu_fault_info fault; @@ -227,4 +228,8 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +void kvm_arm_init_debug(void); +void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); +void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index c99701a34d7b..5c900d49b906 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -117,6 +117,7 @@ int main(void) DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); + DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index f90f4aa7f88d..1949fe5f5424 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -17,7 +17,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o -kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o +kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c new file mode 100644 index 000000000000..faf0e1fdba9e --- /dev/null +++ b/arch/arm64/kvm/debug.c @@ -0,0 +1,81 @@ +/* + * Debug and Guest Debug support + * + * Copyright (C) 2015 - Linaro Ltd + * Author: Alex Bennée + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include + +static DEFINE_PER_CPU(u32, mdcr_el2); + +/** + * kvm_arm_init_debug - grab what we need for debug + * + * Currently the sole task of this function is to retrieve the initial + * value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has + * presumably been set-up by some knowledgeable bootcode. + * + * It is called once per-cpu during CPU hyp initialisation. + */ + +void kvm_arm_init_debug(void) +{ + __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2)); +} + + +/** + * kvm_arm_setup_debug - set up debug related stuff + * + * @vcpu: the vcpu pointer + * + * This is called before each entry into the hypervisor to setup any + * debug related registers. Currently this just ensures we will trap + * access to: + * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR) + * - Debug ROM Address (MDCR_EL2_TDRA) + * - OS related registers (MDCR_EL2_TDOSA) + * + * Additionally, KVM only traps guest accesses to the debug registers if + * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY + * flag on vcpu->arch.debug_flags). Since the guest must not interfere + * with the hardware state when debugging the guest, we must ensure that + * trapping is enabled whenever we are debugging the guest using the + * debug registers. + */ + +void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) +{ + bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY); + + vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK; + vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM | + MDCR_EL2_TPMCR | + MDCR_EL2_TDRA | + MDCR_EL2_TDOSA); + + /* Trap on access to debug registers? */ + if (trap_debug) + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; + +} + +void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) +{ + /* Nothing to do yet */ +} diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 17a8fb14f428..b3176e6e51d1 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -770,17 +770,8 @@ mov x2, #(1 << 15) // Trap CP15 Cr=15 msr hstr_el2, x2 - mrs x2, mdcr_el2 - and x2, x2, #MDCR_EL2_HPMN_MASK - orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) - orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA) - - // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap - // if not dirty. - ldr x3, [x0, #VCPU_DEBUG_FLAGS] - tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f - orr x2, x2, #MDCR_EL2_TDA -1: + // Monitor Debug Config - see kvm_arm_setup_debug() + ldr x2, [x0, #VCPU_MDCR_EL2] msr mdcr_el2, x2 .endm @@ -1285,4 +1276,10 @@ ENTRY(__kvm_hyp_vector) ventry el1_error_invalid // Error 32-bit EL1 ENDPROC(__kvm_hyp_vector) + +ENTRY(__kvm_get_mdcr_el2) + mrs x0, mdcr_el2 + ret +ENDPROC(__kvm_get_mdcr_el2) + .popsection -- cgit v1.2.3 From 84e690bfbed1d1ecb45d8eccd4c7b6c8e878da1c Mon Sep 17 00:00:00 2001 From: Alex Bennée Date: Tue, 7 Jul 2015 17:30:00 +0100 Subject: KVM: arm64: introduce vcpu->arch.debug_ptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduces a level of indirection for the debug registers. Instead of using the sys_regs[] directly we store registers in a structure in the vcpu. The new kvm_arm_reset_debug_ptr() sets the debug ptr to the guest context. Because we no longer give the sys_regs offset for the sys_reg_desc->reg field, but instead the index into a debug-specific struct we need to add a number of additional trap functions for each register. Also as the generic generic user-space access code no longer works we have introduced a new pair of function pointers to the sys_reg_desc structure to override the generic code when needed. Reviewed-by: Christoffer Dall Signed-off-by: Alex Bennée Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 1 + arch/arm/kvm/arm.c | 2 + arch/arm64/include/asm/kvm_asm.h | 24 ++-- arch/arm64/include/asm/kvm_host.h | 17 ++- arch/arm64/kernel/asm-offsets.c | 6 + arch/arm64/kvm/debug.c | 9 ++ arch/arm64/kvm/hyp.S | 24 ++-- arch/arm64/kvm/sys_regs.c | 274 +++++++++++++++++++++++++++++++++++--- arch/arm64/kvm/sys_regs.h | 6 + 9 files changed, 316 insertions(+), 47 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 2b0bc8c57552..dcba0fa5176e 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -234,5 +234,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arm_init_debug(void) {} static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} +static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 77151b111d32..9ce5cf02ed17 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -278,6 +278,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) /* Set up the timer */ kvm_timer_vcpu_init(vcpu); + kvm_arm_reset_debug_ptr(vcpu); + return 0; } diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index f5e40dae291a..67fa0de3d483 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -46,24 +46,16 @@ #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ #define PAR_EL1 21 /* Physical Address Register */ #define MDSCR_EL1 22 /* Monitor Debug System Control Register */ -#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ -#define DBGBCR15_EL1 38 -#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */ -#define DBGBVR15_EL1 54 -#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */ -#define DBGWCR15_EL1 70 -#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */ -#define DBGWVR15_EL1 86 -#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */ +#define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */ /* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 88 /* Domain Access Control Register */ -#define IFSR32_EL2 89 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ -#define TEECR32_EL1 92 /* ThumbEE Configuration Register */ -#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ -#define NR_SYS_REGS 94 +#define DACR32_EL2 24 /* Domain Access Control Register */ +#define IFSR32_EL2 25 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 27 /* Debug Vector Catch Register */ +#define TEECR32_EL1 28 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 29 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 30 /* 32bit mapping */ #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index cfb675466e86..9b99402b14df 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -108,11 +108,25 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Debug state */ + /* Guest debug state */ u64 debug_flags; + /* + * We maintain more than a single set of debug registers to support + * debugging the guest from the host and to maintain separate host and + * guest state during world switches. vcpu_debug_state are the debug + * registers of the vcpu as the guest sees them. host_debug_state are + * the host registers which are saved and restored during world switches. + * + * debug_ptr points to the set of debug registers that should be loaded + * onto the hardware when running the guest. + */ + struct kvm_guest_debug_arch *debug_ptr; + struct kvm_guest_debug_arch vcpu_debug_state; + /* Pointer to host CPU context */ kvm_cpu_context_t *host_cpu_context; + struct kvm_guest_debug_arch host_debug_state; /* VGIC state */ struct vgic_cpu vgic_cpu; @@ -242,5 +256,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} void kvm_arm_init_debug(void); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); +void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 5c900d49b906..d88630899a24 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -116,10 +116,16 @@ int main(void) DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); + DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr)); + DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr)); + DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr)); + DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr)); + DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state)); DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index d439eb8f3239..e0947b77faaa 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -66,6 +66,15 @@ void kvm_arm_init_debug(void) __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2)); } +/** + * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state + */ + +void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state; +} + /** * kvm_arm_setup_debug - set up debug related stuff * diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 8264f5b4817c..d93c0a23630e 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -596,6 +596,7 @@ __restore_sysregs: /* Save debug state */ __save_debug: // x2: ptr to CPU context + // x3: ptr to debug reg struct // x4/x5/x6-22/x24-26: trashed mrs x26, id_aa64dfr0_el1 @@ -606,15 +607,15 @@ __save_debug: sub w25, w26, w25 // How many WPs to skip mov x5, x24 - add x4, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) + add x4, x3, #DEBUG_BCR save_debug dbgbcr - add x4, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) + add x4, x3, #DEBUG_BVR save_debug dbgbvr mov x5, x25 - add x4, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) + add x4, x3, #DEBUG_WCR save_debug dbgwcr - add x4, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) + add x4, x3, #DEBUG_WVR save_debug dbgwvr mrs x21, mdccint_el1 @@ -624,6 +625,7 @@ __save_debug: /* Restore debug state */ __restore_debug: // x2: ptr to CPU context + // x3: ptr to debug reg struct // x4/x5/x6-22/x24-26: trashed mrs x26, id_aa64dfr0_el1 @@ -634,15 +636,15 @@ __restore_debug: sub w25, w26, w25 // How many WPs to skip mov x5, x24 - add x4, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) + add x4, x3, #DEBUG_BCR restore_debug dbgbcr - add x4, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) + add x4, x3, #DEBUG_BVR restore_debug dbgbvr mov x5, x25 - add x4, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) + add x4, x3, #DEBUG_WCR restore_debug dbgwcr - add x4, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) + add x4, x3, #DEBUG_WVR restore_debug dbgwvr ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] @@ -682,6 +684,7 @@ ENTRY(__kvm_vcpu_run) bl __save_sysregs compute_debug_state 1f + add x3, x0, #VCPU_HOST_DEBUG_STATE bl __save_debug 1: activate_traps @@ -697,6 +700,8 @@ ENTRY(__kvm_vcpu_run) bl __restore_fpsimd skip_debug_state x3, 1f + ldr x3, [x0, #VCPU_DEBUG_PTR] + kern_hyp_va x3 bl __restore_debug 1: restore_guest_32bit_state @@ -717,6 +722,8 @@ __kvm_vcpu_return: bl __save_sysregs skip_debug_state x3, 1f + ldr x3, [x0, #VCPU_DEBUG_PTR] + kern_hyp_va x3 bl __save_debug 1: save_guest_32bit_state @@ -739,6 +746,7 @@ __kvm_vcpu_return: // already been saved. Note that we nuke the whole 64bit word. // If we ever add more flags, we'll have to be more careful... str xzr, [x0, #VCPU_DEBUG_FLAGS] + add x3, x0, #VCPU_HOST_DEBUG_STATE bl __restore_debug 1: restore_host_regs diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c370b4014799..158bae7c52cc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -211,6 +211,203 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, return true; } +/* + * reg_to_dbg/dbg_to_reg + * + * A 32 bit write to a debug register leave top bits alone + * A 32 bit read from a debug register only returns the bottom bits + * + * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the + * hyp.S code switches between host and guest values in future. + */ +static inline void reg_to_dbg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + u64 *dbg_reg) +{ + u64 val = *vcpu_reg(vcpu, p->Rt); + + if (p->is_32bit) { + val &= 0xffffffffUL; + val |= ((*dbg_reg >> 32) << 32); + } + + *dbg_reg = val; + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; +} + +static inline void dbg_to_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + u64 *dbg_reg) +{ + u64 val = *dbg_reg; + + if (p->is_32bit) + val &= 0xffffffffUL; + + *vcpu_reg(vcpu, p->Rt) = val; +} + +static inline bool trap_bvr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + return true; +} + +static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_bvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; +} + +static inline bool trap_bcr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + return true; +} + +static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + + return 0; +} + +static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_bcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; +} + +static inline bool trap_wvr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + return true; +} + +static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_wvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; +} + +static inline bool trap_wcr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + return true; +} + +static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_wcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; +} + static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 amair; @@ -240,16 +437,16 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ /* DBGBVRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ - trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \ + trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \ /* DBGBCRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ - trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \ + trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \ /* DBGWVRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ - trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \ + trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \ /* DBGWCRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ - trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 } + trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } /* * Architected system registers. @@ -516,28 +713,55 @@ static bool trap_debug32(struct kvm_vcpu *vcpu, return true; } -#define DBG_BCR_BVR_WCR_WVR(n) \ - /* DBGBVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \ - NULL, (cp14_DBGBVR0 + (n) * 2) }, \ - /* DBGBCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \ - NULL, (cp14_DBGBCR0 + (n) * 2) }, \ - /* DBGWVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \ - NULL, (cp14_DBGWVR0 + (n) * 2) }, \ - /* DBGWCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \ - NULL, (cp14_DBGWCR0 + (n) * 2) } - -#define DBGBXVR(n) \ - { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \ - NULL, cp14_DBGBXVR0 + n * 2 } +/* AArch32 debug register mappings + * + * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0] + * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32] + * + * All control registers and watchpoint value registers are mapped to + * the lower 32 bits of their AArch64 equivalents. We share the trap + * handlers with the above AArch64 code which checks what mode the + * system is in. + */ + +static inline bool trap_xvr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (p->is_write) { + u64 val = *dbg_reg; + + val &= 0xffffffffUL; + val |= *vcpu_reg(vcpu, p->Rt) << 32; + *dbg_reg = val; + + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + } else { + *vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32; + } + + return true; +} + +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ + /* DBGWCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n } + +#define DBGBXVR(n) \ + { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n } /* * Trapped cp14 registers. We generally ignore most of the external * debug, on the principle that they don't really make sense to a - * guest. Revisit this one day, whould this principle change. + * guest. Revisit this one day, would this principle change. */ static const struct sys_reg_desc cp14_regs[] = { /* DBGIDR */ @@ -1303,6 +1527,9 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (!r) return get_invariant_sys_reg(reg->id, uaddr); + if (r->get_user) + return (r->get_user)(vcpu, r, reg, uaddr); + return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); } @@ -1321,6 +1548,9 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (!r) return set_invariant_sys_reg(reg->id, uaddr); + if (r->set_user) + return (r->set_user)(vcpu, r, reg, uaddr); + return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index d411e251412c..eaa324e4db4d 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -55,6 +55,12 @@ struct sys_reg_desc { /* Value (usually reset value) */ u64 val; + + /* Custom get/set_user functions, fallback to generic if NULL */ + int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr); + int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr); }; static inline void print_sys_reg_instr(const struct sys_reg_params *p) -- cgit v1.2.3 From f81309067ff2d84788316c513a415f6bb8c9171f Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 1 Jun 2015 23:44:46 +0100 Subject: ARM: move heavy barrier support out of line The existing memory barrier macro causes a significant amount of code to be inserted inline at every call site. For example, in gpio_set_irq_type(), we have this for mb(): c0344c08: f57ff04e dsb st c0344c0c: e59f8190 ldr r8, [pc, #400] ; c0344da4 c0344c10: e3590004 cmp r9, #4 c0344c14: e5983014 ldr r3, [r8, #20] c0344c18: 0a000054 beq c0344d70 c0344c1c: e3530000 cmp r3, #0 c0344c20: 0a000004 beq c0344c38 c0344c24: e50b2030 str r2, [fp, #-48] ; 0xffffffd0 c0344c28: e50bc034 str ip, [fp, #-52] ; 0xffffffcc c0344c2c: e12fff33 blx r3 c0344c30: e51bc034 ldr ip, [fp, #-52] ; 0xffffffcc c0344c34: e51b2030 ldr r2, [fp, #-48] ; 0xffffffd0 c0344c38: e5963004 ldr r3, [r6, #4] Moving the outer_cache_sync() call out of line reduces the impact of the barrier: c0344968: f57ff04e dsb st c034496c: e35a0004 cmp sl, #4 c0344970: e50b2030 str r2, [fp, #-48] ; 0xffffffd0 c0344974: 0a000044 beq c0344a8c c0344978: ebf363dd bl c001d8f4 c034497c: e5953004 ldr r3, [r5, #4] This should reduce the cache footprint of this code. Overall, this results in a reduction of around 20K in the kernel size: text data bss dec hex filename 10773970 667392 10369656 21811018 14ccf4a ../build/imx6/vmlinux-old 10754219 667392 10369656 21791267 14c8223 ../build/imx6/vmlinux-new Another advantage to this approach is that we can finally resolve the issue of SoCs which have their own memory barrier requirements within multiplatform kernels (such as OMAP.) Here, the bus interconnects need additional handling to ensure that writes become visible in the correct order (eg, between dma_map() operations, writes to DMA coherent memory, and MMIO accesses.) Acked-by: Tony Lindgren Acked-by: Richard Woodruff Signed-off-by: Russell King --- arch/arm/include/asm/barrier.h | 12 +++++++++--- arch/arm/include/asm/outercache.h | 17 ----------------- arch/arm/kernel/irq.c | 1 + arch/arm/mach-mmp/pm-pxa910.c | 1 + arch/arm/mach-prima2/pm.c | 1 + arch/arm/mach-ux500/cache-l2x0.c | 1 + arch/arm/mm/Kconfig | 4 ++++ arch/arm/mm/flush.c | 11 +++++++++++ 8 files changed, 28 insertions(+), 20 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 6c2327e1c732..fea99b0e2087 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -2,7 +2,6 @@ #define __ASM_BARRIER_H #ifndef __ASSEMBLY__ -#include #define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t"); @@ -37,12 +36,19 @@ #define dmb(x) __asm__ __volatile__ ("" : : : "memory") #endif +#ifdef CONFIG_ARM_HEAVY_MB +extern void arm_heavy_mb(void); +#define __arm_heavy_mb(x...) do { dsb(x); arm_heavy_mb(); } while (0) +#else +#define __arm_heavy_mb(x...) dsb(x) +#endif + #ifdef CONFIG_ARCH_HAS_BARRIERS #include #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) -#define mb() do { dsb(); outer_sync(); } while (0) +#define mb() __arm_heavy_mb() #define rmb() dsb() -#define wmb() do { dsb(st); outer_sync(); } while (0) +#define wmb() __arm_heavy_mb(st) #define dma_rmb() dmb(osh) #define dma_wmb() dmb(oshst) #else diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h index 563b92fc2f41..c2bf24f40177 100644 --- a/arch/arm/include/asm/outercache.h +++ b/arch/arm/include/asm/outercache.h @@ -129,21 +129,4 @@ static inline void outer_resume(void) { } #endif -#ifdef CONFIG_OUTER_CACHE_SYNC -/** - * outer_sync - perform a sync point for outer cache - * - * Ensure that all outer cache operations are complete and any store - * buffers are drained. - */ -static inline void outer_sync(void) -{ - if (outer_cache.sync) - outer_cache.sync(); -} -#else -static inline void outer_sync(void) -{ } -#endif - #endif /* __ASM_OUTERCACHE_H */ diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 350f188c92d2..b96c8ed1723a 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -39,6 +39,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/mach-mmp/pm-pxa910.c b/arch/arm/mach-mmp/pm-pxa910.c index 04c9daf9f8d7..7db5870d127f 100644 --- a/arch/arm/mach-mmp/pm-pxa910.c +++ b/arch/arm/mach-mmp/pm-pxa910.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-prima2/pm.c b/arch/arm/mach-prima2/pm.c index d99d08eeb966..83e94c95e314 100644 --- a/arch/arm/mach-prima2/pm.c +++ b/arch/arm/mach-prima2/pm.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c index 7557bede7ae6..780bd13cd7e3 100644 --- a/arch/arm/mach-ux500/cache-l2x0.c +++ b/arch/arm/mach-ux500/cache-l2x0.c @@ -8,6 +8,7 @@ #include #include +#include #include #include "db8500-regs.h" diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 7c6b976ab8d3..df7537f12469 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -883,6 +883,7 @@ config OUTER_CACHE config OUTER_CACHE_SYNC bool + select ARM_HEAVY_MB help The outer cache has a outer_cache_fns.sync function pointer that can be used to drain the write buffer of the outer cache. @@ -1031,6 +1032,9 @@ config ARCH_HAS_BARRIERS This option allows the use of custom mandatory barriers included via the mach/barriers.h file. +config ARM_HEAVY_MB + bool + config ARCH_SUPPORTS_BIG_ENDIAN bool help diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 34b66af516ea..ce6c2960d5ac 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -21,6 +21,17 @@ #include "mm.h" +#ifdef CONFIG_ARM_HEAVY_MB +void arm_heavy_mb(void) +{ +#ifdef CONFIG_OUTER_CACHE_SYNC + if (outer_cache.sync) + outer_cache.sync(); +#endif +} +EXPORT_SYMBOL(arm_heavy_mb); +#endif + #ifdef CONFIG_CPU_CACHE_VIPT static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) -- cgit v1.2.3 From 4e1f8a6f1d978f033f1751e2887b3a69fab3f878 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 3 Jun 2015 13:10:16 +0100 Subject: ARM: add soc memory barrier extension Add an extension to the heavy barrier code to allow a SoC specific memory barrier function to be provided. This is needed for platforms where the interconnect has weak ordering, and thus needs assistance to ensure that memory writes are properly visible in the correct order to other parts of the system. Acked-by: Tony Lindgren Acked-by: Richard Woodruff Signed-off-by: Russell King --- arch/arm/include/asm/barrier.h | 1 + arch/arm/mm/flush.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index fea99b0e2087..3d8f1d3ad9a7 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -37,6 +37,7 @@ #endif #ifdef CONFIG_ARM_HEAVY_MB +extern void (*soc_mb)(void); extern void arm_heavy_mb(void); #define __arm_heavy_mb(x...) do { dsb(x); arm_heavy_mb(); } while (0) #else diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index ce6c2960d5ac..1ec8e7590fc6 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -22,12 +22,16 @@ #include "mm.h" #ifdef CONFIG_ARM_HEAVY_MB +void (*soc_mb)(void); + void arm_heavy_mb(void) { #ifdef CONFIG_OUTER_CACHE_SYNC if (outer_cache.sync) outer_cache.sync(); #endif + if (soc_mb) + soc_mb(); } EXPORT_SYMBOL(arm_heavy_mb); #endif -- cgit v1.2.3 From f746929ffdc8a83c0e6092343d4475f6485e13d3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 6 Jun 2015 00:13:40 +0100 Subject: Revert "ARM: OMAP4: remove dead kconfig option OMAP4_ERRATA_I688" This reverts commit 606da4826b89b044b51e3a84958b802204cfe4c7. We actually need this code for proper behaviour of OMAP4, and it needs fixing a different way other than just removing the code. Disabling code which is necessary in the hopes of persuing multiplatform kernels is a stupid approach. Acked-by: Tony Lindgren Acked-by: Richard Woodruff Signed-off-by: Russell King --- arch/arm/mach-omap2/Kconfig | 21 ++++++++++++ arch/arm/mach-omap2/common.c | 1 + arch/arm/mach-omap2/common.h | 3 ++ arch/arm/mach-omap2/io.c | 2 ++ arch/arm/mach-omap2/omap-secure.h | 7 ++++ arch/arm/mach-omap2/omap4-common.c | 69 ++++++++++++++++++++++++++++++++++++++ arch/arm/mach-omap2/sleep44xx.S | 2 ++ 7 files changed, 105 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index ecc04ff13e95..2128441430ad 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -240,6 +240,27 @@ config OMAP3_SDRC_AC_TIMING wish to say no. Selecting yes without understanding what is going on could result in system crashes; +config OMAP4_ERRATA_I688 + bool "OMAP4 errata: Async Bridge Corruption" + depends on (ARCH_OMAP4 || SOC_OMAP5) && !ARCH_MULTIPLATFORM + select ARCH_HAS_BARRIERS + help + If a data is stalled inside asynchronous bridge because of back + pressure, it may be accepted multiple times, creating pointer + misalignment that will corrupt next transfers on that data path + until next reset of the system (No recovery procedure once the + issue is hit, the path remains consistently broken). Async bridge + can be found on path between MPU to EMIF and MPU to L3 interconnect. + This situation can happen only when the idle is initiated by a + Master Request Disconnection (which is trigged by software when + executing WFI on CPU). + The work-around for this errata needs all the initiators connected + through async bridge must ensure that data path is properly drained + before issuing WFI. This condition will be met if one Strongly ordered + access is performed to the target right before executing the WFI. + In MPU case, L3 T2ASYNC FIFO and DDR T2ASYNC FIFO needs to be drained. + IO barrier ensure that there is no synchronisation loss on initiators + operating on both interconnect port simultaneously. endmenu endif diff --git a/arch/arm/mach-omap2/common.c b/arch/arm/mach-omap2/common.c index eae6a0e87c90..484cdadfb187 100644 --- a/arch/arm/mach-omap2/common.c +++ b/arch/arm/mach-omap2/common.c @@ -30,4 +30,5 @@ int __weak omap_secure_ram_reserve_memblock(void) void __init omap_reserve(void) { omap_secure_ram_reserve_memblock(); + omap_barrier_reserve_memblock(); } diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index cf3cf22ecd42..46e24581d624 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -200,6 +200,9 @@ void __init omap4_map_io(void); void __init omap5_map_io(void); void __init ti81xx_map_io(void); +/* omap_barriers_init() is OMAP4 only */ +void omap_barriers_init(void); + /** * omap_test_timeout - busy-loop, testing a condition * @cond: condition to test until it evaluates to true diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 820dde8b5b04..7743e3672f98 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -306,6 +306,7 @@ void __init am33xx_map_io(void) void __init omap4_map_io(void) { iotable_init(omap44xx_io_desc, ARRAY_SIZE(omap44xx_io_desc)); + omap_barriers_init(); } #endif @@ -313,6 +314,7 @@ void __init omap4_map_io(void) void __init omap5_map_io(void) { iotable_init(omap54xx_io_desc, ARRAY_SIZE(omap54xx_io_desc)); + omap_barriers_init(); } #endif /* diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index af2851fbcdf0..dec2b05d184b 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -70,6 +70,13 @@ extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs, extern u32 rx51_secure_update_aux_cr(u32 set_bits, u32 clear_bits); extern u32 rx51_secure_rng_call(u32 ptr, u32 count, u32 flag); +#ifdef CONFIG_OMAP4_ERRATA_I688 +extern int omap_barrier_reserve_memblock(void); +#else +static inline void omap_barrier_reserve_memblock(void) +{ } +#endif + #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER void set_cntfreq(void); #else diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 16350eefa66c..7bb116a6f86f 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -51,6 +51,75 @@ static void __iomem *twd_base; #define IRQ_LOCALTIMER 29 +#ifdef CONFIG_OMAP4_ERRATA_I688 +/* Used to implement memory barrier on DRAM path */ +#define OMAP4_DRAM_BARRIER_VA 0xfe600000 + +void __iomem *dram_sync, *sram_sync; + +static phys_addr_t paddr; +static u32 size; + +void omap_bus_sync(void) +{ + if (dram_sync && sram_sync) { + writel_relaxed(readl_relaxed(dram_sync), dram_sync); + writel_relaxed(readl_relaxed(sram_sync), sram_sync); + isb(); + } +} +EXPORT_SYMBOL(omap_bus_sync); + +static int __init omap4_sram_init(void) +{ + struct device_node *np; + struct gen_pool *sram_pool; + + np = of_find_compatible_node(NULL, NULL, "ti,omap4-mpu"); + if (!np) + pr_warn("%s:Unable to allocate sram needed to handle errata I688\n", + __func__); + sram_pool = of_get_named_gen_pool(np, "sram", 0); + if (!sram_pool) + pr_warn("%s:Unable to get sram pool needed to handle errata I688\n", + __func__); + else + sram_sync = (void *)gen_pool_alloc(sram_pool, PAGE_SIZE); + + return 0; +} +omap_arch_initcall(omap4_sram_init); + +/* Steal one page physical memory for barrier implementation */ +int __init omap_barrier_reserve_memblock(void) +{ + + size = ALIGN(PAGE_SIZE, SZ_1M); + paddr = arm_memblock_steal(size, SZ_1M); + + return 0; +} + +void __init omap_barriers_init(void) +{ + struct map_desc dram_io_desc[1]; + + dram_io_desc[0].virtual = OMAP4_DRAM_BARRIER_VA; + dram_io_desc[0].pfn = __phys_to_pfn(paddr); + dram_io_desc[0].length = size; + dram_io_desc[0].type = MT_MEMORY_RW_SO; + iotable_init(dram_io_desc, ARRAY_SIZE(dram_io_desc)); + dram_sync = (void __iomem *) dram_io_desc[0].virtual; + + pr_info("OMAP4: Map 0x%08llx to 0x%08lx for dram barrier\n", + (long long) paddr, dram_io_desc[0].virtual); + +} +#else +void __init omap_barriers_init(void) +{} +#endif + void gic_dist_disable(void) { if (gic_dist_base_addr) diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S index ad1bb9431e94..b84a0122d823 100644 --- a/arch/arm/mach-omap2/sleep44xx.S +++ b/arch/arm/mach-omap2/sleep44xx.S @@ -333,9 +333,11 @@ ENDPROC(omap4_cpu_resume) #endif /* defined(CONFIG_SMP) && defined(CONFIG_PM) */ +#ifndef CONFIG_OMAP4_ERRATA_I688 ENTRY(omap_bus_sync) ret lr ENDPROC(omap_bus_sync) +#endif ENTRY(omap_do_wfi) stmfd sp!, {lr} -- cgit v1.2.3 From 3fa609755c11fbe8770ede4d895ebb86fb7b9f1e Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 6 Jun 2015 00:38:08 +0100 Subject: ARM: omap2: restore OMAP4 barrier behaviour Restore the OMAP4 barrier behaviour using the new implementation which allows multiplatform systems to hook into the mb() and wmb() ARM implementations to perform any necessary additional barrier maintanence. Acked-by: Tony Lindgren Acked-by: Richard Woodruff Signed-off-by: Russell King --- arch/arm/mach-omap2/Kconfig | 28 +++------ arch/arm/mach-omap2/common.h | 12 +++- arch/arm/mach-omap2/include/mach/barriers.h | 33 ----------- arch/arm/mach-omap2/omap-secure.h | 7 --- arch/arm/mach-omap2/omap4-common.c | 92 ++++++++++++++++++++++------- arch/arm/mach-omap2/sleep44xx.S | 10 +--- 6 files changed, 91 insertions(+), 91 deletions(-) delete mode 100644 arch/arm/mach-omap2/include/mach/barriers.h (limited to 'arch/arm') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 2128441430ad..8427997e09c4 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -29,6 +29,7 @@ config ARCH_OMAP4 select HAVE_ARM_SCU if SMP select HAVE_ARM_TWD if SMP select OMAP_INTERCONNECT + select OMAP_INTERCONNECT_BARRIER select PL310_ERRATA_588369 if CACHE_L2X0 select PL310_ERRATA_727915 if CACHE_L2X0 select PM_OPP if PM @@ -46,6 +47,7 @@ config SOC_OMAP5 select HAVE_ARM_TWD if SMP select HAVE_ARM_ARCH_TIMER select ARM_ERRATA_798181 if SMP + select OMAP_INTERCONNECT_BARRIER config SOC_AM33XX bool "TI AM33XX" @@ -70,6 +72,7 @@ config SOC_DRA7XX select HAVE_ARM_ARCH_TIMER select IRQ_CROSSBAR select ARM_ERRATA_798181 if SMP + select OMAP_INTERCONNECT_BARRIER config ARCH_OMAP2PLUS bool @@ -91,6 +94,10 @@ config ARCH_OMAP2PLUS help Systems based on OMAP2, OMAP3, OMAP4 or OMAP5 +config OMAP_INTERCONNECT_BARRIER + bool + select ARM_HEAVY_MB + if ARCH_OMAP2PLUS @@ -240,27 +247,6 @@ config OMAP3_SDRC_AC_TIMING wish to say no. Selecting yes without understanding what is going on could result in system crashes; -config OMAP4_ERRATA_I688 - bool "OMAP4 errata: Async Bridge Corruption" - depends on (ARCH_OMAP4 || SOC_OMAP5) && !ARCH_MULTIPLATFORM - select ARCH_HAS_BARRIERS - help - If a data is stalled inside asynchronous bridge because of back - pressure, it may be accepted multiple times, creating pointer - misalignment that will corrupt next transfers on that data path - until next reset of the system (No recovery procedure once the - issue is hit, the path remains consistently broken). Async bridge - can be found on path between MPU to EMIF and MPU to L3 interconnect. - This situation can happen only when the idle is initiated by a - Master Request Disconnection (which is trigged by software when - executing WFI on CPU). - The work-around for this errata needs all the initiators connected - through async bridge must ensure that data path is properly drained - before issuing WFI. This condition will be met if one Strongly ordered - access is performed to the target right before executing the WFI. - In MPU case, L3 T2ASYNC FIFO and DDR T2ASYNC FIFO needs to be drained. - IO barrier ensure that there is no synchronisation loss on initiators - operating on both interconnect port simultaneously. endmenu endif diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index 46e24581d624..82f88b4ec15f 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -189,6 +189,15 @@ static inline void omap44xx_restart(enum reboot_mode mode, const char *cmd) } #endif +#ifdef CONFIG_OMAP_INTERCONNECT_BARRIER +void omap_barrier_reserve_memblock(void); +void omap_barriers_init(void); +#else +static inline void omap_barrier_reserve_memblock(void) +{ +} +#endif + /* This gets called from mach-omap2/io.c, do not call this */ void __init omap2_set_globals_tap(u32 class, void __iomem *tap); @@ -200,9 +209,6 @@ void __init omap4_map_io(void); void __init omap5_map_io(void); void __init ti81xx_map_io(void); -/* omap_barriers_init() is OMAP4 only */ -void omap_barriers_init(void); - /** * omap_test_timeout - busy-loop, testing a condition * @cond: condition to test until it evaluates to true diff --git a/arch/arm/mach-omap2/include/mach/barriers.h b/arch/arm/mach-omap2/include/mach/barriers.h deleted file mode 100644 index 1c582a8592b9..000000000000 --- a/arch/arm/mach-omap2/include/mach/barriers.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * OMAP memory barrier header. - * - * Copyright (C) 2011 Texas Instruments, Inc. - * Santosh Shilimkar - * Richard Woodruff - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __MACH_BARRIERS_H -#define __MACH_BARRIERS_H - -#include - -extern void omap_bus_sync(void); - -#define rmb() dsb() -#define wmb() do { dsb(); outer_sync(); omap_bus_sync(); } while (0) -#define mb() wmb() - -#endif /* __MACH_BARRIERS_H */ diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index dec2b05d184b..af2851fbcdf0 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -70,13 +70,6 @@ extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs, extern u32 rx51_secure_update_aux_cr(u32 set_bits, u32 clear_bits); extern u32 rx51_secure_rng_call(u32 ptr, u32 count, u32 flag); -#ifdef CONFIG_OMAP4_ERRATA_I688 -extern int omap_barrier_reserve_memblock(void); -#else -static inline void omap_barrier_reserve_memblock(void) -{ } -#endif - #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER void set_cntfreq(void); #else diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 7bb116a6f86f..949696b6f17b 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -51,16 +51,73 @@ static void __iomem *twd_base; #define IRQ_LOCALTIMER 29 -#ifdef CONFIG_OMAP4_ERRATA_I688 +#ifdef CONFIG_OMAP_INTERCONNECT_BARRIER + /* Used to implement memory barrier on DRAM path */ #define OMAP4_DRAM_BARRIER_VA 0xfe600000 -void __iomem *dram_sync, *sram_sync; +static void __iomem *dram_sync, *sram_sync; +static phys_addr_t dram_sync_paddr; +static u32 dram_sync_size; + +/* + * The OMAP4 bus structure contains asynchrnous bridges which can buffer + * data writes from the MPU. These asynchronous bridges can be found on + * paths between the MPU to EMIF, and the MPU to L3 interconnects. + * + * We need to be careful about re-ordering which can happen as a result + * of different accesses being performed via different paths, and + * therefore different asynchronous bridges. + */ -static phys_addr_t paddr; -static u32 size; +/* + * OMAP4 interconnect barrier which is called for each mb() and wmb(). + * This is to ensure that normal paths to DRAM (normal memory, cacheable + * accesses) are properly synchronised with writes to DMA coherent memory + * (normal memory, uncacheable) and device writes. + * + * The mb() and wmb() barriers only operate only on the MPU->MA->EMIF + * path, as we need to ensure that data is visible to other system + * masters prior to writes to those system masters being seen. + * + * Note: the SRAM path is not synchronised via mb() and wmb(). + */ +static void omap4_mb(void) +{ + if (dram_sync) + writel_relaxed(0, dram_sync); +} -void omap_bus_sync(void) +/* + * OMAP4 Errata i688 - asynchronous bridge corruption when entering WFI. + * + * If a data is stalled inside asynchronous bridge because of back + * pressure, it may be accepted multiple times, creating pointer + * misalignment that will corrupt next transfers on that data path until + * next reset of the system. No recovery procedure once the issue is hit, + * the path remains consistently broken. + * + * Async bridges can be found on paths between MPU to EMIF and MPU to L3 + * interconnects. + * + * This situation can happen only when the idle is initiated by a Master + * Request Disconnection (which is trigged by software when executing WFI + * on the CPU). + * + * The work-around for this errata needs all the initiators connected + * through an async bridge to ensure that data path is properly drained + * before issuing WFI. This condition will be met if one Strongly ordered + * access is performed to the target right before executing the WFI. + * + * In MPU case, L3 T2ASYNC FIFO and DDR T2ASYNC FIFO needs to be drained. + * IO barrier ensure that there is no synchronisation loss on initiators + * operating on both interconnect port simultaneously. + * + * This is a stronger version of the OMAP4 memory barrier below, and + * operates on both the MPU->MA->EMIF path but also the MPU->OCP path + * as well, and is necessary prior to executing a WFI. + */ +void omap_interconnect_sync(void) { if (dram_sync && sram_sync) { writel_relaxed(readl_relaxed(dram_sync), dram_sync); @@ -68,7 +125,6 @@ void omap_bus_sync(void) isb(); } } -EXPORT_SYMBOL(omap_bus_sync); static int __init omap4_sram_init(void) { @@ -79,7 +135,7 @@ static int __init omap4_sram_init(void) if (!np) pr_warn("%s:Unable to allocate sram needed to handle errata I688\n", __func__); - sram_pool = of_get_named_gen_pool(np, "sram", 0); + sram_pool = of_gen_pool_get(np, "sram", 0); if (!sram_pool) pr_warn("%s:Unable to get sram pool needed to handle errata I688\n", __func__); @@ -91,13 +147,10 @@ static int __init omap4_sram_init(void) omap_arch_initcall(omap4_sram_init); /* Steal one page physical memory for barrier implementation */ -int __init omap_barrier_reserve_memblock(void) +void __init omap_barrier_reserve_memblock(void) { - - size = ALIGN(PAGE_SIZE, SZ_1M); - paddr = arm_memblock_steal(size, SZ_1M); - - return 0; + dram_sync_size = ALIGN(PAGE_SIZE, SZ_1M); + dram_sync_paddr = arm_memblock_steal(dram_sync_size, SZ_1M); } void __init omap_barriers_init(void) @@ -105,19 +158,18 @@ void __init omap_barriers_init(void) struct map_desc dram_io_desc[1]; dram_io_desc[0].virtual = OMAP4_DRAM_BARRIER_VA; - dram_io_desc[0].pfn = __phys_to_pfn(paddr); - dram_io_desc[0].length = size; + dram_io_desc[0].pfn = __phys_to_pfn(dram_sync_paddr); + dram_io_desc[0].length = dram_sync_size; dram_io_desc[0].type = MT_MEMORY_RW_SO; iotable_init(dram_io_desc, ARRAY_SIZE(dram_io_desc)); dram_sync = (void __iomem *) dram_io_desc[0].virtual; - pr_info("OMAP4: Map 0x%08llx to 0x%08lx for dram barrier\n", - (long long) paddr, dram_io_desc[0].virtual); + pr_info("OMAP4: Map %pa to %p for dram barrier\n", + &dram_sync_paddr, dram_sync); + soc_mb = omap4_mb; } -#else -void __init omap_barriers_init(void) -{} + #endif void gic_dist_disable(void) diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S index b84a0122d823..9b09d85d811a 100644 --- a/arch/arm/mach-omap2/sleep44xx.S +++ b/arch/arm/mach-omap2/sleep44xx.S @@ -333,16 +333,12 @@ ENDPROC(omap4_cpu_resume) #endif /* defined(CONFIG_SMP) && defined(CONFIG_PM) */ -#ifndef CONFIG_OMAP4_ERRATA_I688 -ENTRY(omap_bus_sync) - ret lr -ENDPROC(omap_bus_sync) -#endif - ENTRY(omap_do_wfi) stmfd sp!, {lr} +#ifdef CONFIG_OMAP_INTERCONNECT_BARRIER /* Drain interconnect write buffers. */ - bl omap_bus_sync + bl omap_interconnect_sync +#endif /* * Execute an ISB instruction to ensure that all of the -- cgit v1.2.3 From 0d3f2c92e004c67404fabea19728c1962b777bd6 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 15 Jul 2015 15:38:29 +0100 Subject: irqchip/gic: Remove redundant gic_set_irqchip_flags Now that the GIC chip implementation enables IRQCHIP_SKIP_SET_WAKE and IRQCHIP_MASK_ON_SUSPEND by default, the platforms requiring them need not override the irqchip flags as before. This patch removes all the users of gic_set_irqchip_flags and the function itself. Signed-off-by: Sudeep Holla Acked-by: Linus Walleij Cc: Marc Zyngier Cc: Simon Horman Cc: Jason Cooper Cc: Michal Simek Cc: Magnus Damm Cc: Gregory CLEMENT Cc: Geert Uytterhoeven Cc: Lorenzo Pieralisi Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1436971109-20189-2-git-send-email-sudeep.holla@arm.com Signed-off-by: Thomas Gleixner --- arch/arm/mach-shmobile/intc-sh73a0.c | 1 - arch/arm/mach-shmobile/setup-r8a7779.c | 1 - arch/arm/mach-ux500/cpu.c | 1 - arch/arm/mach-zynq/common.c | 1 - drivers/irqchip/irq-gic.c | 5 ----- include/linux/irqchip/arm-gic.h | 1 - 6 files changed, 10 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/mach-shmobile/intc-sh73a0.c b/arch/arm/mach-shmobile/intc-sh73a0.c index fd63ae6532fc..151a71a41fe3 100644 --- a/arch/arm/mach-shmobile/intc-sh73a0.c +++ b/arch/arm/mach-shmobile/intc-sh73a0.c @@ -313,7 +313,6 @@ void __init sh73a0_init_irq(void) void __iomem *gic_cpu_base = IOMEM(0xf0000100); void __iomem *intevtsa = ioremap_nocache(0xffd20100, PAGE_SIZE); - gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE); gic_init(0, 29, gic_dist_base, gic_cpu_base); register_intc_controller(&intcs_desc); diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c index c03e562be12b..aea5cff9495d 100644 --- a/arch/arm/mach-shmobile/setup-r8a7779.c +++ b/arch/arm/mach-shmobile/setup-r8a7779.c @@ -719,7 +719,6 @@ void __init r8a7779_init_irq_dt(void) void __iomem *gic_dist_base = ioremap_nocache(0xf0001000, 0x1000); void __iomem *gic_cpu_base = ioremap_nocache(0xf0000100, 0x1000); #endif - gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE); #ifdef CONFIG_ARCH_SHMOBILE_LEGACY gic_init(0, 29, gic_dist_base, gic_cpu_base); diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index e31d3d61c998..6cb10c77afd8 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -56,7 +56,6 @@ void __init ux500_init_irq(void) struct device_node *np; struct resource r; - gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND); irqchip_init(); np = of_find_compatible_node(NULL, NULL, "stericsson,db8500-prcmu"); of_address_to_resource(np, 0, &r); diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index 616d5840fc2e..2ad1accfba35 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -186,7 +186,6 @@ static void __init zynq_map_io(void) static void __init zynq_irq_init(void) { - gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND); irqchip_init(); } diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 39ff8df8cf64..80fde37076c4 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -881,11 +881,6 @@ static const struct irq_domain_ops gic_irq_domain_ops = { .xlate = gic_irq_domain_xlate, }; -void gic_set_irqchip_flags(unsigned long flags) -{ - gic_chip.flags |= flags; -} - void __init gic_init_bases(unsigned int gic_nr, int irq_start, void __iomem *dist_base, void __iomem *cpu_base, u32 percpu_offset, struct device_node *node) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 9de976b4f9a7..61a2007eb49a 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -95,7 +95,6 @@ struct device_node; -void gic_set_irqchip_flags(unsigned long flags); void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *, u32 offset, struct device_node *); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); -- cgit v1.2.3 From 125897908c718972351b589da89b7f990892d4df Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Apr 2014 20:04:39 +0200 Subject: arm: Provide atomic_{or,xor,and} Implement atomic logic ops -- atomic_{or,xor,and}. These will replace the atomic_{set,clear}_mask functions that are available on some archs. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner --- arch/arm/include/asm/atomic.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index e22c11970b7b..ff214bac9cb4 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -194,6 +194,14 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) +#define CONFIG_ARCH_HAS_ATOMIC_OR +#define atomic_andnot atomic_andnot + +ATOMIC_OP(and, &=, and) +ATOMIC_OP(andnot, &= ~, bic) +ATOMIC_OP(or, |=, orr) +ATOMIC_OP(xor, ^=, eor) + #undef ATOMIC_OPS #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -321,6 +329,13 @@ static inline long long atomic64_##op##_return(long long i, atomic64_t *v) \ ATOMIC64_OPS(add, adds, adc) ATOMIC64_OPS(sub, subs, sbc) +#define atomic64_andnot atomic64_andnot + +ATOMIC64_OP(and, and, and) +ATOMIC64_OP(andnot, bic, bic) +ATOMIC64_OP(or, orr, orr) +ATOMIC64_OP(xor, eor, eor) + #undef ATOMIC64_OPS #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -- cgit v1.2.3 From e6942b7de2dfe44ebde9bae57dadece5abca9de8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Apr 2014 19:32:50 +0200 Subject: atomic: Provide atomic_{or,xor,and} Implement atomic logic ops -- atomic_{or,xor,and}. These will replace the atomic_{set,clear}_mask functions that are available on some archs. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner --- arch/alpha/include/asm/atomic.h | 1 - arch/arc/include/asm/atomic.h | 1 - arch/arm/include/asm/atomic.h | 1 - arch/arm64/include/asm/atomic.h | 1 - arch/avr32/include/asm/atomic.h | 2 -- arch/blackfin/include/asm/atomic.h | 2 -- arch/frv/include/asm/atomic.h | 2 -- arch/h8300/include/asm/atomic.h | 2 -- arch/hexagon/include/asm/atomic.h | 2 -- arch/ia64/include/asm/atomic.h | 2 -- arch/m32r/include/asm/atomic.h | 2 -- arch/m68k/include/asm/atomic.h | 2 -- arch/metag/include/asm/atomic_lnkget.h | 2 -- arch/mips/include/asm/atomic.h | 2 -- arch/mn10300/include/asm/atomic.h | 2 -- arch/parisc/include/asm/atomic.h | 2 -- arch/powerpc/include/asm/atomic.h | 2 -- arch/s390/include/asm/atomic.h | 2 -- arch/sh/include/asm/atomic-grb.h | 2 -- arch/sparc/include/asm/atomic_32.h | 2 -- arch/sparc/include/asm/atomic_64.h | 2 -- arch/tile/include/asm/atomic_32.h | 2 -- arch/tile/include/asm/atomic_64.h | 2 -- arch/x86/include/asm/atomic.h | 2 -- arch/xtensa/include/asm/atomic.h | 2 -- include/asm-generic/atomic.h | 21 ++++++++++++--------- include/asm-generic/atomic64.h | 4 ++++ include/linux/atomic.h | 13 ------------- lib/atomic64.c | 3 +++ 29 files changed, 19 insertions(+), 68 deletions(-) (limited to 'arch/arm') diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 0eff853398d2..e8c956098424 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -110,7 +110,6 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR #define atomic_andnot atomic_andnot #define atomic64_andnot atomic64_andnot diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index e90b701fc6a8..2a847821dee1 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -144,7 +144,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR #define atomic_andnot atomic_andnot ATOMIC_OP(and, &=, and) diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index ff214bac9cb4..82b75a7cb762 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -194,7 +194,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR #define atomic_andnot atomic_andnot ATOMIC_OP(and, &=, and) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 2876173397b2..866a71fca9a3 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -85,7 +85,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add, add) ATOMIC_OPS(sub, sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR #define atomic_andnot atomic_andnot ATOMIC_OP(and, and) diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index 115d3005e4bc..97c9bdf83409 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -51,8 +51,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ (void)__atomic_##op##_return(i, v); \ } -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, and) ATOMIC_OP(or, or) ATOMIC_OP(xor, eor) diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h index eafa55b81a7b..2d6a7a3823c3 100644 --- a/arch/blackfin/include/asm/atomic.h +++ b/arch/blackfin/include/asm/atomic.h @@ -28,8 +28,6 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value); #define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i) #define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i)) -#define CONFIG_ARCH_HAS_ATOMIC_OR - #define atomic_or(i, v) (void)__raw_atomic_or_asm(&(v)->counter, i) #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i) #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i) diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 74d22454d7c6..fc48bea26b40 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -192,8 +192,6 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ (void)__atomic64_fetch_##op(i, &v->counter); \ } -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(or) ATOMIC_OP(and) ATOMIC_OP(xor) diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index f181f820be33..c4d061f09c44 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -41,8 +41,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ ATOMIC_OP_RETURN(add, +=) ATOMIC_OP_RETURN(sub, -=) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, &=) ATOMIC_OP(or, |=) ATOMIC_OP(xor, ^=) diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 4efe2c7c0dd8..811d61f6422d 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -132,8 +132,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 0809ef5d6b9a..be4beeb77d57 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -69,8 +69,6 @@ ATOMIC_OP(sub, -) : ia64_atomic_sub(__ia64_asr_i, v); \ }) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, &) ATOMIC_OP(or, |) ATOMIC_OP(xor, ^) diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index 7245463c1e98..b2a13fbd5be0 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -94,8 +94,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index c30e43ea49a3..93ebd96aa494 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -77,8 +77,6 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, &=, and) ATOMIC_OP(or, |=, or) ATOMIC_OP(xor, ^=, eor) diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h index 930c12cb8d37..0642606de901 100644 --- a/arch/metag/include/asm/atomic_lnkget.h +++ b/arch/metag/include/asm/atomic_lnkget.h @@ -74,8 +74,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 0430ba6ab762..4c42fd9af777 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -137,8 +137,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ ATOMIC_OPS(add, +=, addu) ATOMIC_OPS(sub, -=, subu) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, &=, and) ATOMIC_OP(or, |=, or) ATOMIC_OP(xor, ^=, xor) diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index 03eea8158cf9..f5a63f0bda46 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -89,8 +89,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index be2c50ddebd6..2536965d00ea 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -126,8 +126,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, &=) ATOMIC_OP(or, |=) ATOMIC_OP(xor, ^=) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 6ca89e2aca15..55f106ed12bf 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -67,8 +67,6 @@ static __inline__ int atomic_##op##_return(int a, atomic_t *v) \ ATOMIC_OPS(add, add) ATOMIC_OPS(sub, subf) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, and) ATOMIC_OP(or, or) ATOMIC_OP(xor, xor) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index b3859d8e001f..d761aeff72da 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -282,8 +282,6 @@ static inline void atomic64_##op(long i, atomic64_t *v) \ __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER); \ } -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC64_OP(and, AND) ATOMIC64_OP(or, OR) ATOMIC64_OP(xor, XOR) diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h index 4b03830d48c7..b94df40e5f2d 100644 --- a/arch/sh/include/asm/atomic-grb.h +++ b/arch/sh/include/asm/atomic-grb.h @@ -48,8 +48,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index e19d8880b146..7dcbebbcaec6 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -17,8 +17,6 @@ #include #include -#define CONFIG_ARCH_HAS_ATOMIC_OR - #define ATOMIC_INIT(i) { (i) } int atomic_add_return(int, atomic_t *); diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index d6af27c93450..917084ace49d 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -33,8 +33,6 @@ long atomic64_##op##_return(long, atomic64_t *); ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index 94237922f0dd..d320ce253d86 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -41,8 +41,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ _atomic_##op((unsigned long *)&v->counter, i); \ } -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index d07d9fc6e2a1..096a56d6ead4 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -58,8 +58,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) return oldval; } -#define CONFIG_ARCH_HAS_ATOMIC_OR - static inline void atomic_and(int i, atomic_t *v) { __insn_fetchand4((void *)&v->counter, i); diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index f3a3ec040694..b3493023efda 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -191,8 +191,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ : "memory"); \ } -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index 4dd2450300a6..31371f43c23b 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -145,8 +145,6 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 92947e0a532a..a41b0b8f7404 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -102,24 +102,27 @@ ATOMIC_OP_RETURN(sub, -) ATOMIC_OP(and, &) #endif -#ifndef atomic_clear_mask -#define atomic_clear_mask(i, v) atomic_and(~(i), (v)) -#endif - #ifndef atomic_or -#ifndef CONFIG_ARCH_HAS_ATOMIC_OR -#define CONFIG_ARCH_HAS_ATOMIC_OR -#endif ATOMIC_OP(or, |) #endif -#ifndef atomic_set_mask -#define atomic_set_mask(i, v) atomic_or((i), (v)) +#ifndef atomic_xor +ATOMIC_OP(xor, ^) #endif #undef ATOMIC_OP_RETURN #undef ATOMIC_OP +static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) +{ + atomic_and(~mask, v); +} + +static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) +{ + atomic_or(mask, v); +} + /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index 30ad9c86cebb..d48e78ccad3d 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -32,6 +32,10 @@ extern long long atomic64_##op##_return(long long a, atomic64_t *v); ATOMIC64_OPS(add) ATOMIC64_OPS(sub) +ATOMIC64_OP(and) +ATOMIC64_OP(or) +ATOMIC64_OP(xor) + #undef ATOMIC64_OPS #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 5b08a8540ecf..7d6279012a1f 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -111,19 +111,6 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif -#ifndef CONFIG_ARCH_HAS_ATOMIC_OR -static inline void atomic_or(int i, atomic_t *v) -{ - int old; - int new; - - do { - old = atomic_read(v); - new = old | i; - } while (atomic_cmpxchg(v, old, new) != old); -} -#endif /* #ifndef CONFIG_ARCH_HAS_ATOMIC_OR */ - #include #ifdef CONFIG_GENERIC_ATOMIC64 #include diff --git a/lib/atomic64.c b/lib/atomic64.c index 1298c05ef528..2886ebac6567 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -102,6 +102,9 @@ EXPORT_SYMBOL(atomic64_##op##_return); ATOMIC64_OPS(add, +=) ATOMIC64_OPS(sub, -=) +ATOMIC64_OP(and, &=) +ATOMIC64_OP(or, |=) +ATOMIC64_OP(xor, ^=) #undef ATOMIC64_OPS #undef ATOMIC64_OP_RETURN -- cgit v1.2.3 From ee04139d916a61454850f3e3c687f50f891fc8bd Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 23 Jul 2015 09:09:49 +0200 Subject: pinctrl/ARM: move GPIO and pinctrl deps to device tree This gets the GPIO ranges out of the driver and into the device tree where they belong. Standard DT bindings already exist for this. Since no systems with this are deployed we can just augment all device trees and the drivers at the same time and simplify the world. This also defines the array of GPIO chips related to the pin controller. Cc: arm@kernel.org Acked-by: Olof Johansson Signed-off-by: Linus Walleij --- arch/arm/boot/dts/ste-dbx5x0.dtsi | 23 ++++++++++++---------- arch/arm/boot/dts/ste-nomadik-stn8815.dtsi | 8 +++++++- drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c | 21 -------------------- drivers/pinctrl/nomadik/pinctrl-nomadik-db8540.c | 24 ----------------------- drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c | 16 --------------- drivers/pinctrl/nomadik/pinctrl-nomadik.c | 17 ---------------- drivers/pinctrl/nomadik/pinctrl-nomadik.h | 4 ---- 7 files changed, 20 insertions(+), 93 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi index 853684ad7773..b8fadb1d8616 100644 --- a/arch/arm/boot/dts/ste-dbx5x0.dtsi +++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi @@ -286,7 +286,7 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <0>; - + gpio-ranges = <&pinctrl 0 0 32>; clocks = <&prcc_pclk 1 9>; }; @@ -301,7 +301,7 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <1>; - + gpio-ranges = <&pinctrl 0 32 5>; clocks = <&prcc_pclk 1 9>; }; @@ -316,7 +316,7 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <2>; - + gpio-ranges = <&pinctrl 0 64 32>; clocks = <&prcc_pclk 3 8>; }; @@ -331,7 +331,7 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <3>; - + gpio-ranges = <&pinctrl 0 96 2>; clocks = <&prcc_pclk 3 8>; }; @@ -346,7 +346,7 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <4>; - + gpio-ranges = <&pinctrl 0 128 32>; clocks = <&prcc_pclk 3 8>; }; @@ -361,7 +361,7 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <5>; - + gpio-ranges = <&pinctrl 0 160 12>; clocks = <&prcc_pclk 3 8>; }; @@ -376,7 +376,7 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <6>; - + gpio-ranges = <&pinctrl 0 192 32>; clocks = <&prcc_pclk 2 11>; }; @@ -391,7 +391,7 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <7>; - + gpio-ranges = <&pinctrl 0 224 7>; clocks = <&prcc_pclk 2 11>; }; @@ -406,12 +406,15 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <8>; - + gpio-ranges = <&pinctrl 0 256 12>; clocks = <&prcc_pclk 5 1>; }; - pinctrl { + pinctrl: pinctrl { compatible = "stericsson,db8500-pinctrl"; + nomadik-gpio-chips = <&gpio0>, <&gpio1>, <&gpio2>, <&gpio3>, + <&gpio4>, <&gpio5>, <&gpio6>, <&gpio7>, + <&gpio8>; prcm = <&prcmu>; }; diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index 9a5f2ba139b7..6846e3eb027d 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -52,6 +52,7 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <0>; + gpio-ranges = <&pinctrl 0 0 32>; clocks = <&pclk>; }; @@ -65,6 +66,7 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <1>; + gpio-ranges = <&pinctrl 0 32 32>; clocks = <&pclk>; }; @@ -78,12 +80,14 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <2>; + gpio-ranges = <&pinctrl 0 64 32>; clocks = <&pclk>; }; gpio3: gpio@101e7000 { compatible = "st,nomadik-gpio"; reg = <0x101e7000 0x80>; + ngpio = <28>; interrupt-parent = <&vica>; interrupts = <9>; interrupt-controller; @@ -91,11 +95,13 @@ gpio-controller; #gpio-cells = <2>; gpio-bank = <3>; + gpio-ranges = <&pinctrl 0 96 28>; clocks = <&pclk>; }; - pinctrl { + pinctrl: pinctrl { compatible = "stericsson,stn8815-pinctrl"; + nomadik-gpio-chips = <&gpio0>, <&gpio1>, <&gpio2>, <&gpio3>; /* Pin configurations */ uart0 { uart0_default_mux: uart0_mux { diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c b/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c index c74840729648..8392083514fb 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c @@ -355,25 +355,6 @@ static const struct pinctrl_pin_desc nmk_db8500_pins[] = { PINCTRL_PIN(DB8500_PIN_AC27, "GPIO267_AC27"), }; -#define DB8500_GPIO_RANGE(a, b, c) { .name = "DB8500", .id = a, .base = b, \ - .pin_base = b, .npins = c } - -/* - * This matches the 32-pin gpio chips registered by the GPIO portion. This - * cannot be const since we assign the struct gpio_chip * pointer at runtime. - */ -static struct pinctrl_gpio_range nmk_db8500_ranges[] = { - DB8500_GPIO_RANGE(0, 0, 32), - DB8500_GPIO_RANGE(1, 32, 5), - DB8500_GPIO_RANGE(2, 64, 32), - DB8500_GPIO_RANGE(3, 96, 2), - DB8500_GPIO_RANGE(4, 128, 32), - DB8500_GPIO_RANGE(5, 160, 12), - DB8500_GPIO_RANGE(6, 192, 32), - DB8500_GPIO_RANGE(7, 224, 7), - DB8500_GPIO_RANGE(8, 256, 12), -}; - /* * Read the pin group names like this: * u0_a_1 = first groups of pins for uart0 on alt function a @@ -1238,8 +1219,6 @@ static const u16 db8500_prcm_gpiocr_regs[] = { }; static const struct nmk_pinctrl_soc_data nmk_db8500_soc = { - .gpio_ranges = nmk_db8500_ranges, - .gpio_num_ranges = ARRAY_SIZE(nmk_db8500_ranges), .pins = nmk_db8500_pins, .npins = ARRAY_SIZE(nmk_db8500_pins), .functions = nmk_db8500_functions, diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik-db8540.c b/drivers/pinctrl/nomadik/pinctrl-nomadik-db8540.c index d7ba5443bae0..2860eafd1b42 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik-db8540.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik-db8540.c @@ -341,28 +341,6 @@ static const struct pinctrl_pin_desc nmk_db8540_pins[] = { PINCTRL_PIN(DB8540_PIN_D17, "GPIO267_D17"), }; -#define DB8540_GPIO_RANGE(a, b, c) { .name = "db8540", .id = a, .base = b, \ - .pin_base = b, .npins = c } - -/* - * This matches the 32-pin gpio chips registered by the GPIO portion. This - * cannot be const since we assign the struct gpio_chip * pointer at runtime. - */ -static struct pinctrl_gpio_range nmk_db8540_ranges[] = { - DB8540_GPIO_RANGE(0, 0, 18), - DB8540_GPIO_RANGE(0, 22, 7), - DB8540_GPIO_RANGE(1, 33, 6), - DB8540_GPIO_RANGE(2, 64, 4), - DB8540_GPIO_RANGE(2, 70, 18), - DB8540_GPIO_RANGE(3, 116, 12), - DB8540_GPIO_RANGE(4, 128, 32), - DB8540_GPIO_RANGE(5, 160, 9), - DB8540_GPIO_RANGE(6, 192, 23), - DB8540_GPIO_RANGE(6, 219, 5), - DB8540_GPIO_RANGE(7, 224, 9), - DB8540_GPIO_RANGE(8, 256, 12), -}; - /* * Read the pin group names like this: * u0_a_1 = first groups of pins for uart0 on alt function a @@ -1247,8 +1225,6 @@ static const u16 db8540_prcm_gpiocr_regs[] = { }; static const struct nmk_pinctrl_soc_data nmk_db8540_soc = { - .gpio_ranges = nmk_db8540_ranges, - .gpio_num_ranges = ARRAY_SIZE(nmk_db8540_ranges), .pins = nmk_db8540_pins, .npins = ARRAY_SIZE(nmk_db8540_pins), .functions = nmk_db8540_functions, diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c b/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c index 2cd71470f270..a2de3b7a57a0 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c @@ -264,20 +264,6 @@ static const struct pinctrl_pin_desc nmk_stn8815_pins[] = { PINCTRL_PIN(STN8815_PIN_J22, "GPIO123_J22"), }; -#define STN8815_GPIO_RANGE(a, b, c) { .name = "STN8815", .id = a, .base = b, \ - .pin_base = b, .npins = c } - -/* - * This matches the 32-pin gpio chips registered by the GPIO portion. This - * cannot be const since we assign the struct gpio_chip * pointer at runtime. - */ -static struct pinctrl_gpio_range nmk_stn8815_ranges[] = { - STN8815_GPIO_RANGE(0, 0, 32), - STN8815_GPIO_RANGE(1, 32, 32), - STN8815_GPIO_RANGE(2, 64, 32), - STN8815_GPIO_RANGE(3, 96, 28), -}; - /* * Read the pin group names like this: * u0_a_1 = first groups of pins for uart0 on alt function a @@ -342,8 +328,6 @@ static const struct nmk_function nmk_stn8815_functions[] = { }; static const struct nmk_pinctrl_soc_data nmk_stn8815_soc = { - .gpio_ranges = nmk_stn8815_ranges, - .gpio_num_ranges = ARRAY_SIZE(nmk_stn8815_ranges), .pins = nmk_stn8815_pins, .npins = ARRAY_SIZE(nmk_stn8815_pins), .functions = nmk_stn8815_functions, diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index 143d1c06078c..8c87b2ec29c1 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -2058,19 +2058,6 @@ static int nmk_pinctrl_probe(struct platform_device *pdev) } } - /* - * We need all the GPIO drivers to probe FIRST, or we will not be able - * to obtain references to the struct gpio_chip * for them, and we - * need this to proceed. - */ - for (i = 0; i < npct->soc->gpio_num_ranges; i++) { - if (!nmk_gpio_chips[npct->soc->gpio_ranges[i].id]) { - dev_warn(&pdev->dev, "GPIO chip %d not registered yet\n", i); - return -EPROBE_DEFER; - } - npct->soc->gpio_ranges[i].gc = &nmk_gpio_chips[npct->soc->gpio_ranges[i].id]->chip; - } - nmk_pinctrl_desc.pins = npct->soc->pins; nmk_pinctrl_desc.npins = npct->soc->npins; npct->dev = &pdev->dev; @@ -2081,10 +2068,6 @@ static int nmk_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(npct->pctl); } - /* We will handle a range of GPIO pins */ - for (i = 0; i < npct->soc->gpio_num_ranges; i++) - pinctrl_add_gpio_range(npct->pctl, &npct->soc->gpio_ranges[i]); - platform_set_drvdata(pdev, npct); dev_info(&pdev->dev, "initialized Nomadik pin control driver\n"); diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.h b/drivers/pinctrl/nomadik/pinctrl-nomadik.h index d8215f1e70c7..30bba2a75a58 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.h +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.h @@ -121,8 +121,6 @@ struct nmk_pingroup { /** * struct nmk_pinctrl_soc_data - Nomadik pin controller per-SoC configuration - * @gpio_ranges: An array of GPIO ranges for this SoC - * @gpio_num_ranges: The number of GPIO ranges for this SoC * @pins: An array describing all pins the pin controller affects. * All pins which are also GPIOs must be listed first within the * array, and be numbered identically to the GPIO controller's @@ -137,8 +135,6 @@ struct nmk_pingroup { * @prcm_gpiocr_registers: The array of PRCM GPIOCR registers on this SoC */ struct nmk_pinctrl_soc_data { - struct pinctrl_gpio_range *gpio_ranges; - unsigned gpio_num_ranges; const struct pinctrl_pin_desc *pins; unsigned npins; const struct nmk_function *functions; -- cgit v1.2.3 From 1447f93f22add339ee63d1a7015309d9fbfa47e8 Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Mon, 27 Jul 2015 15:06:49 +0200 Subject: ARM: net: add support for BPF_ANC | SKF_AD_PKTTYPE in ARM JIT. Signed-off-by: Nicolas Schichan Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index c011e2296cb1..6ff248ca36d9 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -895,6 +895,17 @@ b_epilogue: OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx); } break; + case BPF_ANC | SKF_AD_PKTTYPE: + ctx->seen |= SEEN_SKB; + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, + __pkt_type_offset[0]) != 1); + off = PKT_TYPE_OFFSET(); + emit(ARM_LDRB_I(r_A, r_skb, off), ctx); + emit(ARM_AND_I(r_A, r_A, PKT_TYPE_MAX), ctx); +#ifdef __BIG_ENDIAN_BITFIELD + emit(ARM_LSR_I(r_A, r_A, 5), ctx); +#endif + break; case BPF_ANC | SKF_AD_QUEUE: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, -- cgit v1.2.3 From 303249ab168f58ee52c502389b9a8046af78d142 Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Mon, 27 Jul 2015 15:06:50 +0200 Subject: ARM: net: add support for BPF_ANC | SKF_AD_PAY_OFFSET in ARM JIT. Signed-off-by: Nicolas Schichan Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6ff248ca36d9..3c73caf6e09b 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -915,6 +915,14 @@ b_epilogue: off = offsetof(struct sk_buff, queue_mapping); emit(ARM_LDRH_I(r_A, r_skb, off), ctx); break; + case BPF_ANC | SKF_AD_PAY_OFFSET: + ctx->seen |= SEEN_SKB | SEEN_CALL; + + emit(ARM_MOV_R(ARM_R0, r_skb), ctx); + emit_mov_i(ARM_R3, (unsigned int)skb_get_poff, ctx); + emit_blx_r(ARM_R3, ctx); + emit(ARM_MOV_R(r_A, ARM_R0), ctx); + break; case BPF_LDX | BPF_W | BPF_ABS: /* * load a 32bit word from struct seccomp_data. -- cgit v1.2.3 From 5bf705b43b7243c68e831ed3072db2ed00edc7fa Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Mon, 27 Jul 2015 15:06:51 +0200 Subject: ARM: net: add support for BPF_ANC | SKF_AD_HATYPE in ARM JIT. Signed-off-by: Nicolas Schichan Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 22 ++++++++++++++++++++-- arch/arm/net/bpf_jit_32.h | 3 +++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 3c73caf6e09b..876060bcceeb 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -857,7 +857,9 @@ b_epilogue: emit(ARM_LDR_I(r_A, r_scratch, off), ctx); break; case BPF_ANC | SKF_AD_IFINDEX: + case BPF_ANC | SKF_AD_HATYPE: /* A = skb->dev->ifindex */ + /* A = skb->dev->type */ ctx->seen |= SEEN_SKB; off = offsetof(struct sk_buff, dev); emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); @@ -867,8 +869,24 @@ b_epilogue: BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - off = offsetof(struct net_device, ifindex); - emit(ARM_LDR_I(r_A, r_scratch, off), ctx); + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, + type) != 2); + + if (code == (BPF_ANC | SKF_AD_IFINDEX)) { + off = offsetof(struct net_device, ifindex); + emit(ARM_LDR_I(r_A, r_scratch, off), ctx); + } else { + /* + * offset of field "type" in "struct + * net_device" is above what can be + * used in the ldrh rd, [rn, #imm] + * instruction, so load the offset in + * a register and use ldrh rd, [rn, rm] + */ + off = offsetof(struct net_device, type); + emit_mov_i(ARM_R3, off, ctx); + emit(ARM_LDRH_R(r_A, r_scratch, ARM_R3), ctx); + } break; case BPF_ANC | SKF_AD_MARK: ctx->seen |= SEEN_SKB; diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index b2d7d92859d3..4b17d5ab652a 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -74,6 +74,7 @@ #define ARM_INST_LDRB_I 0x05d00000 #define ARM_INST_LDRB_R 0x07d00000 #define ARM_INST_LDRH_I 0x01d000b0 +#define ARM_INST_LDRH_R 0x019000b0 #define ARM_INST_LDR_I 0x05900000 #define ARM_INST_LDM 0x08900000 @@ -160,6 +161,8 @@ | (rm)) #define ARM_LDRH_I(rt, rn, off) (ARM_INST_LDRH_I | (rt) << 12 | (rn) << 16 \ | (((off) & 0xf0) << 4) | ((off) & 0xf)) +#define ARM_LDRH_R(rt, rn, rm) (ARM_INST_LDRH_R | (rt) << 12 | (rn) << 16 \ + | (rm)) #define ARM_LDM(rn, regs) (ARM_INST_LDM | (rn) << 16 | (regs)) -- cgit v1.2.3 From 261cd3ad5d49ebf128afa5397763b27fe5c3d8f8 Mon Sep 17 00:00:00 2001 From: Anand Moon Date: Tue, 9 Jun 2015 13:37:59 +0930 Subject: ARM: exynos_defconfig: Enable CONFIG_SND_SOC_ODROIDX2 for Odroid-XU3 Enable CONFIG_SND_SOC_ODROIDX2 and CONFIG_SND_SIMPLE_CARD to enable sound on Odroid-XU3 board using the max98090 audio codec. Signed-off-by: Anand Moon Reviewed-by: Lukasz Majewski Signed-off-by: Krzysztof Kozlowski Signed-off-by: Kukjin Kim --- arch/arm/configs/exynos_defconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 9504e7790288..1b7253b9f117 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -144,6 +144,8 @@ CONFIG_SND=y CONFIG_SND_SOC=y CONFIG_SND_SOC_SAMSUNG=y CONFIG_SND_SOC_SNOW=y +CONFIG_SND_SOC_ODROIDX2=y +CONFIG_SND_SIMPLE_CARD=y CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_XHCI_HCD=y -- cgit v1.2.3 From f7b5a58fd7f441f93279d964fe1ee4ff8669859e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 8 Jul 2015 15:40:46 -0700 Subject: ARM: multi_v7_defconfig: Remove old Samsung USB PHY configs The old drivers/usb/phy/phy-samsung-usb{2,3} are now deleted since were replaced by newer drivers that use the Generic PHY framework but their Kconfig options were left over in multi_v7_defconfig. Signed-off-by: Javier Martinez Canillas Signed-off-by: Krzysztof Kozlowski Signed-off-by: Kukjin Kim --- arch/arm/configs/multi_v7_defconfig | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 6d83a1bf0c74..ee1a67dc5ef1 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -492,8 +492,6 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_AB8500_USB=y CONFIG_KEYSTONE_USB_PHY=y CONFIG_OMAP_USB3=y -CONFIG_SAMSUNG_USB2PHY=y -CONFIG_SAMSUNG_USB3PHY=y CONFIG_USB_GPIO_VBUS=y CONFIG_USB_ISP1301=y CONFIG_USB_MXS_PHY=y -- cgit v1.2.3 From cf91f6d94684b8c7b81c3558ad2639179d7c1da9 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 15 Jul 2015 18:32:44 +0200 Subject: ARM: multi_v7_defconfig: Enable NTC Thermistors support The Exynos5420 Peach Pit and Exynos5800 Peach Pi Chromebooks have IIO based ADC thermistors. Enable module support for its driver and also for the needed Exynos ADC driver. Signed-off-by: Javier Martinez Canillas Signed-off-by: Krzysztof Kozlowski Signed-off-by: Kukjin Kim --- arch/arm/configs/multi_v7_defconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index ee1a67dc5ef1..2fa1e54c1ed6 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -357,6 +357,7 @@ CONFIG_POWER_RESET_SUN6I=y CONFIG_POWER_RESET_RMOBILE=y CONFIG_SENSORS_LM90=y CONFIG_SENSORS_LM95245=y +CONFIG_SENSORS_NTC_THERMISTOR=m CONFIG_THERMAL=y CONFIG_CPU_THERMAL=y CONFIG_RCAR_THERMAL=y @@ -614,6 +615,7 @@ CONFIG_MEMORY=y CONFIG_TI_AEMIF=y CONFIG_IIO=y CONFIG_AT91_ADC=m +CONFIG_EXYNOS_ADC=m CONFIG_XILINX_XADC=y CONFIG_AK8975=y CONFIG_PWM=y -- cgit v1.2.3 From 3e5e95ff468b3d19fb08a308b921a83716650b48 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 15 Jul 2015 18:32:45 +0200 Subject: ARM: exynos_defconfig: Enable NTC Thermistors support The Exynos5420 Peach Pit and Exynos5800 Peach Pi Chromebooks have IIO based ADC thermistors. Enable built-in support for its driver. Signed-off-by: Javier Martinez Canillas Signed-off-by: Krzysztof Kozlowski Signed-off-by: Kukjin Kim --- arch/arm/configs/exynos_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm') diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 1b7253b9f117..67965cedeb69 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -94,6 +94,7 @@ CONFIG_CHARGER_MAX14577=y CONFIG_CHARGER_MAX77693=y CONFIG_CHARGER_TPS65090=y CONFIG_SENSORS_LM90=y +CONFIG_SENSORS_NTC_THERMISTOR=y CONFIG_SENSORS_PWM_FAN=y CONFIG_SENSORS_INA2XX=y CONFIG_THERMAL=y -- cgit v1.2.3 From 8ae81c25cfe962a788a28e023d9a78934d807f7d Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 29 Jun 2015 22:58:46 +0100 Subject: arm: perf: Set affinity for PPI based PMUs For PPI based PMUs, we bail out early in of_pmu_irq_cfg() without setting the PMU's supported_cpus bitmap. This causes the smp_call_function_any() in armv7_probe_num_events() to fail. Set the bitmap to be all CPUs so that we properly probe PMUs that use PPIs. Fixes: cc88116da0d1 ("arm: perf: treat PMUs as CPU affine") Cc: Mark Rutland Signed-off-by: Stephen Boyd Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 54272e0be713..7d5379c1c443 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -795,8 +795,10 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); - if (irq >= 0 && irq_is_percpu(irq)) + if (irq >= 0 && irq_is_percpu(irq)) { + cpumask_setall(&pmu->supported_cpus); return 0; + } irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) -- cgit v1.2.3 From b6c084d7aa8bca21920cbbe13ad58572fa85ece6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Jun 2015 13:59:01 +0100 Subject: ARM: perf: extend interrupt-affinity property for PPIs On systems containing multiple, heterogeneous clusters we need a way to associate a PMU "device" with the CPU(s) on which it exists. For PMUs that signal overflow with SPIs, this relationship is determined via the "interrupt-affinity" property, which contains a list of phandles to CPU nodes for the PMU. For PMUs using PPIs, the per-cpu nature of the interrupt isn't enough to determine the set of CPUs which actually contain the device. This patch allows the interrupt-affinity property to be specified on a PMU node irrespective of the interrupt type. For PPIs, it identifies the set of CPUs signalling the PPI in question. Tested-by: Stephen Boyd # Krait PMU Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.txt | 12 +++-- arch/arm/kernel/perf_event.c | 65 ++++++++++++++++++--------- 2 files changed, 53 insertions(+), 24 deletions(-) (limited to 'arch/arm') diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 3b5f5d1088c6..435251fa9ce0 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -26,13 +26,19 @@ Required properties: Optional properties: -- interrupt-affinity : Valid only when using SPIs, specifies a list of phandles - to CPU nodes corresponding directly to the affinity of +- interrupt-affinity : When using SPIs, specifies a list of phandles to CPU + nodes corresponding directly to the affinity of the SPIs listed in the interrupts property. - This property should be present when there is more than + When using a PPI, specifies a list of phandles to CPU + nodes corresponding to the set of CPUs which have + a PMU of this type signalling the PPI listed in the + interrupts property. + + This property should be present when there is more than a single SPI. + - qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd events. diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 7d5379c1c443..5a8f17bfcc60 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -790,32 +790,39 @@ static int probe_current_pmu(struct arm_pmu *pmu, static int of_pmu_irq_cfg(struct arm_pmu *pmu) { - int i, irq, *irqs; + int *irqs, i = 0; + bool using_spi = false; struct platform_device *pdev = pmu->plat_device; - /* Don't bother with PPIs; they're already affine */ - irq = platform_get_irq(pdev, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - cpumask_setall(&pmu->supported_cpus); - return 0; - } - irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) return -ENOMEM; - for (i = 0; i < pdev->num_resources; ++i) { + do { struct device_node *dn; - int cpu; + int cpu, irq; - dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", - i); - if (!dn) { - pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", - of_node_full_name(pdev->dev.of_node), i); + /* See if we have an affinity entry */ + dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i); + if (!dn) break; + + /* Check the IRQ type and prohibit a mix of PPIs and SPIs */ + irq = platform_get_irq(pdev, i); + if (irq >= 0) { + bool spi = !irq_is_percpu(irq); + + if (i > 0 && spi != using_spi) { + pr_err("PPI/SPI IRQ type mismatch for %s!\n", + dn->name); + kfree(irqs); + return -EINVAL; + } + + using_spi = spi; } + /* Now look up the logical CPU number */ for_each_possible_cpu(cpu) if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) break; @@ -824,20 +831,36 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) pr_warn("Failed to find logical CPU for %s\n", dn->name); of_node_put(dn); + cpumask_setall(&pmu->supported_cpus); break; } of_node_put(dn); - irqs[i] = cpu; + /* For SPIs, we need to track the affinity per IRQ */ + if (using_spi) { + if (i >= pdev->num_resources) { + of_node_put(dn); + break; + } + + irqs[i] = cpu; + } + + /* Keep track of the CPUs containing this PMU type */ cpumask_set_cpu(cpu, &pmu->supported_cpus); - } + of_node_put(dn); + i++; + } while (1); - if (i == pdev->num_resources) { + /* If we didn't manage to parse anything, claim to support all CPUs */ + if (cpumask_weight(&pmu->supported_cpus) == 0) + cpumask_setall(&pmu->supported_cpus); + + /* If we matched up the IRQ affinities, use them to route the SPIs */ + if (using_spi && i == pdev->num_resources) pmu->irq_affinity = irqs; - } else { + else kfree(irqs); - cpumask_setall(&pmu->supported_cpus); - } return 0; } -- cgit v1.2.3 From bc1e3c4687df62a1f2ba1b6be11efbeb76145366 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 30 Jun 2015 13:56:57 +0100 Subject: ARM: perf: replace arch_find_n_match_cpu_physical_id with of_cpu_device_node_get arch_find_n_match_cpu_physical_id parses the device tree to get the device node for a given logical cpu index. However, since ARM PMUs get probed after the CPU device nodes are stashed while registering the cpus, we can use of_cpu_device_node_get to avoid another DT parse. This patch replaces arch_find_n_match_cpu_physical_id with of_cpu_device_node_get to reuse the stashed value directly instead. Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 5a8f17bfcc60..1cb40651d783 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -824,7 +824,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) /* Now look up the logical CPU number */ for_each_possible_cpu(cpu) - if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) + if (dn == of_cpu_device_node_get(cpu)) break; if (cpu >= nr_cpu_ids) { -- cgit v1.2.3 From fa8ad7889d83bcf0a6cdbf6d3622f3ec019cde14 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 6 Jul 2015 12:23:53 +0100 Subject: arm: perf: factor arm_pmu core out to drivers To enable sharing of the arm_pmu code with arm64, this patch factors it out to drivers/perf/. A new drivers/perf directory is added for performance monitor drivers to live under. MAINTAINERS is updated accordingly. Files added previously without a corresponsing MAINTAINERS update (perf_regs.c, perf_callchain.c, and perf_event.h) are also added. Cc: Arnaldo Carvalho de Melo Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Linus Walleij Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Russell King Cc: Will Deacon Signed-off-by: Mark Rutland [will: augmented Kconfig help slightly] Signed-off-by: Will Deacon --- MAINTAINERS | 6 +- arch/arm/Kconfig | 8 +- arch/arm/include/asm/pmu.h | 154 ------ arch/arm/kernel/Makefile | 3 +- arch/arm/kernel/perf_event.c | 921 ------------------------------------ arch/arm/kernel/perf_event_v6.c | 2 +- arch/arm/kernel/perf_event_v7.c | 2 +- arch/arm/kernel/perf_event_xscale.c | 2 +- arch/arm/mach-ux500/cpu-db8500.c | 2 +- drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/perf/Kconfig | 15 + drivers/perf/Makefile | 1 + drivers/perf/arm_pmu.c | 921 ++++++++++++++++++++++++++++++++++++ include/linux/perf/arm_pmu.h | 154 ++++++ 15 files changed, 1105 insertions(+), 1089 deletions(-) delete mode 100644 arch/arm/include/asm/pmu.h delete mode 100644 arch/arm/kernel/perf_event.c create mode 100644 drivers/perf/Kconfig create mode 100644 drivers/perf/Makefile create mode 100644 drivers/perf/arm_pmu.c create mode 100644 include/linux/perf/arm_pmu.h (limited to 'arch/arm') diff --git a/MAINTAINERS b/MAINTAINERS index fd6078443083..485c92ced47d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -799,11 +799,13 @@ F: arch/arm/include/asm/floppy.h ARM PMU PROFILING AND DEBUGGING M: Will Deacon S: Maintained -F: arch/arm/kernel/perf_event* +F: arch/arm/kernel/perf_* F: arch/arm/oprofile/common.c -F: arch/arm/include/asm/pmu.h F: arch/arm/kernel/hw_breakpoint.c F: arch/arm/include/asm/hw_breakpoint.h +F: arch/arm/include/asm/perf_event.h +F: drivers/perf/arm_pmu.c +F: include/linux/perf/arm_pmu.h ARM PORT M: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1c5021002fe4..4f7bc3d4b186 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1701,12 +1701,8 @@ config HIGHPTE user-space 2nd level page tables to reside in high memory. config HW_PERF_EVENTS - bool "Enable hardware performance counter support for perf events" - depends on PERF_EVENTS - default y - help - Enable hardware performance counter support for perf events. If - disabled, perf events will use software events only. + def_bool y + depends on ARM_PMU config SYS_SUPPORTS_HUGETLBFS def_bool y diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h deleted file mode 100644 index 3fc87dfd77e6..000000000000 --- a/arch/arm/include/asm/pmu.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * linux/arch/arm/include/asm/pmu.h - * - * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#ifndef __ARM_PMU_H__ -#define __ARM_PMU_H__ - -#include -#include - -#include - -/* - * struct arm_pmu_platdata - ARM PMU platform data - * - * @handle_irq: an optional handler which will be called from the - * interrupt and passed the address of the low level handler, - * and can be used to implement any platform specific handling - * before or after calling it. - */ -struct arm_pmu_platdata { - irqreturn_t (*handle_irq)(int irq, void *dev, - irq_handler_t pmu_handler); -}; - -#ifdef CONFIG_HW_PERF_EVENTS - -/* - * The ARMv7 CPU PMU supports up to 32 event counters. - */ -#define ARMPMU_MAX_HWEVENTS 32 - -#define HW_OP_UNSUPPORTED 0xFFFF -#define C(_x) PERF_COUNT_HW_CACHE_##_x -#define CACHE_OP_UNSUPPORTED 0xFFFF - -#define PERF_MAP_ALL_UNSUPPORTED \ - [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED - -#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ -[0 ... C(MAX) - 1] = { \ - [0 ... C(OP_MAX) - 1] = { \ - [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ - }, \ -} - -/* The events for a given PMU register set. */ -struct pmu_hw_events { - /* - * The events that are active on the PMU for the given index. - */ - struct perf_event *events[ARMPMU_MAX_HWEVENTS]; - - /* - * A 1 bit for an index indicates that the counter is being used for - * an event. A 0 means that the counter can be used. - */ - DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS); - - /* - * Hardware lock to serialize accesses to PMU registers. Needed for the - * read/modify/write sequences. - */ - raw_spinlock_t pmu_lock; - - /* - * When using percpu IRQs, we need a percpu dev_id. Place it here as we - * already have to allocate this struct per cpu. - */ - struct arm_pmu *percpu_pmu; -}; - -struct arm_pmu { - struct pmu pmu; - cpumask_t active_irqs; - cpumask_t supported_cpus; - int *irq_affinity; - char *name; - irqreturn_t (*handle_irq)(int irq_num, void *dev); - void (*enable)(struct perf_event *event); - void (*disable)(struct perf_event *event); - int (*get_event_idx)(struct pmu_hw_events *hw_events, - struct perf_event *event); - void (*clear_event_idx)(struct pmu_hw_events *hw_events, - struct perf_event *event); - int (*set_event_filter)(struct hw_perf_event *evt, - struct perf_event_attr *attr); - u32 (*read_counter)(struct perf_event *event); - void (*write_counter)(struct perf_event *event, u32 val); - void (*start)(struct arm_pmu *); - void (*stop)(struct arm_pmu *); - void (*reset)(void *); - int (*request_irq)(struct arm_pmu *, irq_handler_t handler); - void (*free_irq)(struct arm_pmu *); - int (*map_event)(struct perf_event *event); - int num_events; - atomic_t active_events; - struct mutex reserve_mutex; - u64 max_period; - struct platform_device *plat_device; - struct pmu_hw_events __percpu *hw_events; - struct notifier_block hotplug_nb; -}; - -#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) - -int armpmu_register(struct arm_pmu *armpmu, int type); - -u64 armpmu_event_update(struct perf_event *event); - -int armpmu_event_set_period(struct perf_event *event); - -int armpmu_map_event(struct perf_event *event, - const unsigned (*event_map)[PERF_COUNT_HW_MAX], - const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u32 raw_event_mask); - -struct pmu_probe_info { - unsigned int cpuid; - unsigned int mask; - int (*init)(struct arm_pmu *); -}; - -#define PMU_PROBE(_cpuid, _mask, _fn) \ -{ \ - .cpuid = (_cpuid), \ - .mask = (_mask), \ - .init = (_fn), \ -} - -#define ARM_PMU_PROBE(_cpuid, _fn) \ - PMU_PROBE(_cpuid, ARM_CPU_PART_MASK, _fn) - -#define ARM_PMU_XSCALE_MASK ((0xff << 24) | ARM_CPU_XSCALE_ARCH_MASK) - -#define XSCALE_PMU_PROBE(_version, _fn) \ - PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn) - -int arm_pmu_device_probe(struct platform_device *pdev, - const struct of_device_id *of_table, - const struct pmu_probe_info *probe_table); - -#endif /* CONFIG_HW_PERF_EVENTS */ - -#endif /* __ARM_PMU_H__ */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index e69f7a19735d..fcb25c1c5c21 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -71,8 +71,7 @@ obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o \ - perf_event_xscale.o perf_event_v6.o \ +obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_xscale.o perf_event_v6.o \ perf_event_v7.o CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c deleted file mode 100644 index 1cb40651d783..000000000000 --- a/arch/arm/kernel/perf_event.c +++ /dev/null @@ -1,921 +0,0 @@ -#undef DEBUG - -/* - * ARM performance counter support. - * - * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * Copyright (C) 2010 ARM Ltd., Will Deacon - * - * This code is based on the sparc64 perf event code, which is in turn based - * on the x86 code. - */ -#define pr_fmt(fmt) "hw perfevents: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int -armpmu_map_cache_event(const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u64 config) -{ - unsigned int cache_type, cache_op, cache_result, ret; - - cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; - - if (ret == CACHE_OP_UNSUPPORTED) - return -ENOENT; - - return ret; -} - -static int -armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) -{ - int mapping; - - if (config >= PERF_COUNT_HW_MAX) - return -EINVAL; - - mapping = (*event_map)[config]; - return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; -} - -static int -armpmu_map_raw_event(u32 raw_event_mask, u64 config) -{ - return (int)(config & raw_event_mask); -} - -int -armpmu_map_event(struct perf_event *event, - const unsigned (*event_map)[PERF_COUNT_HW_MAX], - const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u32 raw_event_mask) -{ - u64 config = event->attr.config; - int type = event->attr.type; - - if (type == event->pmu->type) - return armpmu_map_raw_event(raw_event_mask, config); - - switch (type) { - case PERF_TYPE_HARDWARE: - return armpmu_map_hw_event(event_map, config); - case PERF_TYPE_HW_CACHE: - return armpmu_map_cache_event(cache_map, config); - case PERF_TYPE_RAW: - return armpmu_map_raw_event(raw_event_mask, config); - } - - return -ENOENT; -} - -int armpmu_event_set_period(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - s64 left = local64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int ret = 0; - - if (unlikely(left <= -period)) { - left = period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - /* - * Limit the maximum period to prevent the counter value - * from overtaking the one we are about to program. In - * effect we are reducing max_period to account for - * interrupt latency (and we are being very conservative). - */ - if (left > (armpmu->max_period >> 1)) - left = armpmu->max_period >> 1; - - local64_set(&hwc->prev_count, (u64)-left); - - armpmu->write_counter(event, (u64)(-left) & 0xffffffff); - - perf_event_update_userpage(event); - - return ret; -} - -u64 armpmu_event_update(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - u64 delta, prev_raw_count, new_raw_count; - -again: - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = armpmu->read_counter(event); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - delta = (new_raw_count - prev_raw_count) & armpmu->max_period; - - local64_add(delta, &event->count); - local64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static void -armpmu_read(struct perf_event *event) -{ - armpmu_event_update(event); -} - -static void -armpmu_stop(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - - /* - * ARM pmu always has to update the counter, so ignore - * PERF_EF_UPDATE, see comments in armpmu_start(). - */ - if (!(hwc->state & PERF_HES_STOPPED)) { - armpmu->disable(event); - armpmu_event_update(event); - hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; - } -} - -static void armpmu_start(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - - /* - * ARM pmu always has to reprogram the period, so ignore - * PERF_EF_RELOAD, see the comment below. - */ - if (flags & PERF_EF_RELOAD) - WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); - - hwc->state = 0; - /* - * Set the period again. Some counters can't be stopped, so when we - * were stopped we simply disabled the IRQ source and the counter - * may have been left counting. If we don't do this step then we may - * get an interrupt too soon or *way* too late if the overflow has - * happened since disabling. - */ - armpmu_event_set_period(event); - armpmu->enable(event); -} - -static void -armpmu_del(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - armpmu_stop(event, PERF_EF_UPDATE); - hw_events->events[idx] = NULL; - clear_bit(idx, hw_events->used_mask); - if (armpmu->clear_event_idx) - armpmu->clear_event_idx(hw_events, event); - - perf_event_update_userpage(event); -} - -static int -armpmu_add(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - struct hw_perf_event *hwc = &event->hw; - int idx; - int err = 0; - - /* An event following a process won't be stopped earlier */ - if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) - return -ENOENT; - - perf_pmu_disable(event->pmu); - - /* If we don't have a space for the counter then finish early. */ - idx = armpmu->get_event_idx(hw_events, event); - if (idx < 0) { - err = idx; - goto out; - } - - /* - * If there is an event in the counter we are going to use then make - * sure it is disabled. - */ - event->hw.idx = idx; - armpmu->disable(event); - hw_events->events[idx] = event; - - hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; - if (flags & PERF_EF_START) - armpmu_start(event, PERF_EF_RELOAD); - - /* Propagate our changes to the userspace mapping. */ - perf_event_update_userpage(event); - -out: - perf_pmu_enable(event->pmu); - return err; -} - -static int -validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, - struct perf_event *event) -{ - struct arm_pmu *armpmu; - - if (is_software_event(event)) - return 1; - - /* - * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The - * core perf code won't check that the pmu->ctx == leader->ctx - * until after pmu->event_init(event). - */ - if (event->pmu != pmu) - return 0; - - if (event->state < PERF_EVENT_STATE_OFF) - return 1; - - if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) - return 1; - - armpmu = to_arm_pmu(event->pmu); - return armpmu->get_event_idx(hw_events, event) >= 0; -} - -static int -validate_group(struct perf_event *event) -{ - struct perf_event *sibling, *leader = event->group_leader; - struct pmu_hw_events fake_pmu; - - /* - * Initialise the fake PMU. We only need to populate the - * used_mask for the purposes of validation. - */ - memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); - - if (!validate_event(event->pmu, &fake_pmu, leader)) - return -EINVAL; - - list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(event->pmu, &fake_pmu, sibling)) - return -EINVAL; - } - - if (!validate_event(event->pmu, &fake_pmu, event)) - return -EINVAL; - - return 0; -} - -static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) -{ - struct arm_pmu *armpmu; - struct platform_device *plat_device; - struct arm_pmu_platdata *plat; - int ret; - u64 start_clock, finish_clock; - - /* - * we request the IRQ with a (possibly percpu) struct arm_pmu**, but - * the handlers expect a struct arm_pmu*. The percpu_irq framework will - * do any necessary shifting, we just need to perform the first - * dereference. - */ - armpmu = *(void **)dev; - plat_device = armpmu->plat_device; - plat = dev_get_platdata(&plat_device->dev); - - start_clock = sched_clock(); - if (plat && plat->handle_irq) - ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); - else - ret = armpmu->handle_irq(irq, armpmu); - finish_clock = sched_clock(); - - perf_sample_event_took(finish_clock - start_clock); - return ret; -} - -static void -armpmu_release_hardware(struct arm_pmu *armpmu) -{ - armpmu->free_irq(armpmu); -} - -static int -armpmu_reserve_hardware(struct arm_pmu *armpmu) -{ - int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); - if (err) { - armpmu_release_hardware(armpmu); - return err; - } - - return 0; -} - -static void -hw_perf_event_destroy(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - atomic_t *active_events = &armpmu->active_events; - struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; - - if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { - armpmu_release_hardware(armpmu); - mutex_unlock(pmu_reserve_mutex); - } -} - -static int -event_requires_mode_exclusion(struct perf_event_attr *attr) -{ - return attr->exclude_idle || attr->exclude_user || - attr->exclude_kernel || attr->exclude_hv; -} - -static int -__hw_perf_event_init(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int mapping; - - mapping = armpmu->map_event(event); - - if (mapping < 0) { - pr_debug("event %x:%llx not supported\n", event->attr.type, - event->attr.config); - return mapping; - } - - /* - * We don't assign an index until we actually place the event onto - * hardware. Use -1 to signify that we haven't decided where to put it - * yet. For SMP systems, each core has it's own PMU so we can't do any - * clever allocation or constraints checking at this point. - */ - hwc->idx = -1; - hwc->config_base = 0; - hwc->config = 0; - hwc->event_base = 0; - - /* - * Check whether we need to exclude the counter from certain modes. - */ - if ((!armpmu->set_event_filter || - armpmu->set_event_filter(hwc, &event->attr)) && - event_requires_mode_exclusion(&event->attr)) { - pr_debug("ARM performance counters do not support " - "mode exclusion\n"); - return -EOPNOTSUPP; - } - - /* - * Store the event encoding into the config_base field. - */ - hwc->config_base |= (unsigned long)mapping; - - if (!is_sampling_event(event)) { - /* - * For non-sampling runs, limit the sample_period to half - * of the counter width. That way, the new counter value - * is far less likely to overtake the previous one unless - * you have some serious IRQ latency issues. - */ - hwc->sample_period = armpmu->max_period >> 1; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - } - - if (event->group_leader != event) { - if (validate_group(event) != 0) - return -EINVAL; - } - - return 0; -} - -static int armpmu_event_init(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - int err = 0; - atomic_t *active_events = &armpmu->active_events; - - /* - * Reject CPU-affine events for CPUs that are of a different class to - * that which this PMU handles. Process-following events (where - * event->cpu == -1) can be migrated between CPUs, and thus we have to - * reject them later (in armpmu_add) if they're scheduled on a - * different class of CPU. - */ - if (event->cpu != -1 && - !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) - return -ENOENT; - - /* does not support taken branch sampling */ - if (has_branch_stack(event)) - return -EOPNOTSUPP; - - if (armpmu->map_event(event) == -ENOENT) - return -ENOENT; - - event->destroy = hw_perf_event_destroy; - - if (!atomic_inc_not_zero(active_events)) { - mutex_lock(&armpmu->reserve_mutex); - if (atomic_read(active_events) == 0) - err = armpmu_reserve_hardware(armpmu); - - if (!err) - atomic_inc(active_events); - mutex_unlock(&armpmu->reserve_mutex); - } - - if (err) - return err; - - err = __hw_perf_event_init(event); - if (err) - hw_perf_event_destroy(event); - - return err; -} - -static void armpmu_enable(struct pmu *pmu) -{ - struct arm_pmu *armpmu = to_arm_pmu(pmu); - struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); - - /* For task-bound events we may be called on other CPUs */ - if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) - return; - - if (enabled) - armpmu->start(armpmu); -} - -static void armpmu_disable(struct pmu *pmu) -{ - struct arm_pmu *armpmu = to_arm_pmu(pmu); - - /* For task-bound events we may be called on other CPUs */ - if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) - return; - - armpmu->stop(armpmu); -} - -/* - * In heterogeneous systems, events are specific to a particular - * microarchitecture, and aren't suitable for another. Thus, only match CPUs of - * the same microarchitecture. - */ -static int armpmu_filter_match(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - unsigned int cpu = smp_processor_id(); - return cpumask_test_cpu(cpu, &armpmu->supported_cpus); -} - -static void armpmu_init(struct arm_pmu *armpmu) -{ - atomic_set(&armpmu->active_events, 0); - mutex_init(&armpmu->reserve_mutex); - - armpmu->pmu = (struct pmu) { - .pmu_enable = armpmu_enable, - .pmu_disable = armpmu_disable, - .event_init = armpmu_event_init, - .add = armpmu_add, - .del = armpmu_del, - .start = armpmu_start, - .stop = armpmu_stop, - .read = armpmu_read, - .filter_match = armpmu_filter_match, - }; -} - -int armpmu_register(struct arm_pmu *armpmu, int type) -{ - armpmu_init(armpmu); - pr_info("enabled with %s PMU driver, %d counters available\n", - armpmu->name, armpmu->num_events); - return perf_pmu_register(&armpmu->pmu, armpmu->name, type); -} - -/* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *__oprofile_cpu_pmu; - -/* - * Despite the names, these two functions are CPU-specific and are used - * by the OProfile/perf code. - */ -const char *perf_pmu_name(void) -{ - if (!__oprofile_cpu_pmu) - return NULL; - - return __oprofile_cpu_pmu->name; -} -EXPORT_SYMBOL_GPL(perf_pmu_name); - -int perf_num_counters(void) -{ - int max_events = 0; - - if (__oprofile_cpu_pmu != NULL) - max_events = __oprofile_cpu_pmu->num_events; - - return max_events; -} -EXPORT_SYMBOL_GPL(perf_num_counters); - -static void cpu_pmu_enable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - enable_percpu_irq(irq, IRQ_TYPE_NONE); -} - -static void cpu_pmu_disable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - disable_percpu_irq(irq); -} - -static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) -{ - int i, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); - free_percpu_irq(irq, &hw_events->percpu_pmu); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - } - } -} - -static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) -{ - int i, err, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - if (!pmu_device) - return -ENODEV; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { - pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); - return 0; - } - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &hw_events->percpu_pmu); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { - pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, cpu); - continue; - } - - err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - - cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); - } - } - - return 0; -} - -/* - * PMU hardware loses all context when a CPU goes offline. - * When a CPU is hotplugged back in, since some hardware registers are - * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading - * junk values out of them. - */ -static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, - void *hcpu) -{ - int cpu = (unsigned long)hcpu; - struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); - - if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) - return NOTIFY_DONE; - - if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) - return NOTIFY_DONE; - - if (pmu->reset) - pmu->reset(pmu); - else - return NOTIFY_DONE; - - return NOTIFY_OK; -} - -static int cpu_pmu_init(struct arm_pmu *cpu_pmu) -{ - int err; - int cpu; - struct pmu_hw_events __percpu *cpu_hw_events; - - cpu_hw_events = alloc_percpu(struct pmu_hw_events); - if (!cpu_hw_events) - return -ENOMEM; - - cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; - err = register_cpu_notifier(&cpu_pmu->hotplug_nb); - if (err) - goto out_hw_events; - - for_each_possible_cpu(cpu) { - struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); - raw_spin_lock_init(&events->pmu_lock); - events->percpu_pmu = cpu_pmu; - } - - cpu_pmu->hw_events = cpu_hw_events; - cpu_pmu->request_irq = cpu_pmu_request_irq; - cpu_pmu->free_irq = cpu_pmu_free_irq; - - /* Ensure the PMU has sane values out of reset. */ - if (cpu_pmu->reset) - on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, - cpu_pmu, 1); - - /* If no interrupts available, set the corresponding capability flag */ - if (!platform_get_irq(cpu_pmu->plat_device, 0)) - cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - - return 0; - -out_hw_events: - free_percpu(cpu_hw_events); - return err; -} - -static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) -{ - unregister_cpu_notifier(&cpu_pmu->hotplug_nb); - free_percpu(cpu_pmu->hw_events); -} - -/* - * CPU PMU identification and probing. - */ -static int probe_current_pmu(struct arm_pmu *pmu, - const struct pmu_probe_info *info) -{ - int cpu = get_cpu(); - unsigned int cpuid = read_cpuid_id(); - int ret = -ENODEV; - - pr_info("probing PMU on CPU %d\n", cpu); - - for (; info->init != NULL; info++) { - if ((cpuid & info->mask) != info->cpuid) - continue; - ret = info->init(pmu); - break; - } - - put_cpu(); - return ret; -} - -static int of_pmu_irq_cfg(struct arm_pmu *pmu) -{ - int *irqs, i = 0; - bool using_spi = false; - struct platform_device *pdev = pmu->plat_device; - - irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - if (!irqs) - return -ENOMEM; - - do { - struct device_node *dn; - int cpu, irq; - - /* See if we have an affinity entry */ - dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i); - if (!dn) - break; - - /* Check the IRQ type and prohibit a mix of PPIs and SPIs */ - irq = platform_get_irq(pdev, i); - if (irq >= 0) { - bool spi = !irq_is_percpu(irq); - - if (i > 0 && spi != using_spi) { - pr_err("PPI/SPI IRQ type mismatch for %s!\n", - dn->name); - kfree(irqs); - return -EINVAL; - } - - using_spi = spi; - } - - /* Now look up the logical CPU number */ - for_each_possible_cpu(cpu) - if (dn == of_cpu_device_node_get(cpu)) - break; - - if (cpu >= nr_cpu_ids) { - pr_warn("Failed to find logical CPU for %s\n", - dn->name); - of_node_put(dn); - cpumask_setall(&pmu->supported_cpus); - break; - } - of_node_put(dn); - - /* For SPIs, we need to track the affinity per IRQ */ - if (using_spi) { - if (i >= pdev->num_resources) { - of_node_put(dn); - break; - } - - irqs[i] = cpu; - } - - /* Keep track of the CPUs containing this PMU type */ - cpumask_set_cpu(cpu, &pmu->supported_cpus); - of_node_put(dn); - i++; - } while (1); - - /* If we didn't manage to parse anything, claim to support all CPUs */ - if (cpumask_weight(&pmu->supported_cpus) == 0) - cpumask_setall(&pmu->supported_cpus); - - /* If we matched up the IRQ affinities, use them to route the SPIs */ - if (using_spi && i == pdev->num_resources) - pmu->irq_affinity = irqs; - else - kfree(irqs); - - return 0; -} - -int arm_pmu_device_probe(struct platform_device *pdev, - const struct of_device_id *of_table, - const struct pmu_probe_info *probe_table) -{ - const struct of_device_id *of_id; - const int (*init_fn)(struct arm_pmu *); - struct device_node *node = pdev->dev.of_node; - struct arm_pmu *pmu; - int ret = -ENODEV; - - pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); - if (!pmu) { - pr_info("failed to allocate PMU device!\n"); - return -ENOMEM; - } - - if (!__oprofile_cpu_pmu) - __oprofile_cpu_pmu = pmu; - - pmu->plat_device = pdev; - - if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { - init_fn = of_id->data; - - ret = of_pmu_irq_cfg(pmu); - if (!ret) - ret = init_fn(pmu); - } else { - ret = probe_current_pmu(pmu, probe_table); - cpumask_setall(&pmu->supported_cpus); - } - - if (ret) { - pr_info("failed to probe PMU!\n"); - goto out_free; - } - - ret = cpu_pmu_init(pmu); - if (ret) - goto out_free; - - ret = armpmu_register(pmu, -1); - if (ret) - goto out_destroy; - - return 0; - -out_destroy: - cpu_pmu_destroy(pmu); -out_free: - pr_info("failed to register PMU devices!\n"); - kfree(pmu); - return ret; -} diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 09f83e414a72..09413e7b49aa 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -34,9 +34,9 @@ #include #include -#include #include +#include #include enum armv6_perf_types { diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index f9b37f876e20..126dc679b230 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -21,11 +21,11 @@ #include #include #include -#include #include #include "../vfp/vfpinstr.h" #include +#include #include /* diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 304d056d5b25..aa0499e2eef7 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -16,9 +16,9 @@ #include #include -#include #include +#include #include enum xscale_perf_types { diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 16913800bbf9..5578dc1ab52b 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include #include "setup.h" diff --git a/drivers/Kconfig b/drivers/Kconfig index 6e973b8e3a3b..3497485f5eab 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -176,6 +176,8 @@ source "drivers/powercap/Kconfig" source "drivers/mcb/Kconfig" +source "drivers/perf/Kconfig" + source "drivers/ras/Kconfig" source "drivers/thunderbolt/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index b64b49f6e01b..f245f2291b8a 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -161,6 +161,7 @@ obj-$(CONFIG_NTB) += ntb/ obj-$(CONFIG_FMC) += fmc/ obj-$(CONFIG_POWERCAP) += powercap/ obj-$(CONFIG_MCB) += mcb/ +obj-$(CONFIG_PERF_EVENTS) += perf/ obj-$(CONFIG_RAS) += ras/ obj-$(CONFIG_THUNDERBOLT) += thunderbolt/ obj-$(CONFIG_CORESIGHT) += hwtracing/coresight/ diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig new file mode 100644 index 000000000000..d9de36ee165d --- /dev/null +++ b/drivers/perf/Kconfig @@ -0,0 +1,15 @@ +# +# Performance Monitor Drivers +# + +menu "Performance monitor support" + +config ARM_PMU + depends on PERF_EVENTS && ARM + bool "ARM PMU framework" + default y + help + Say y if you want to use CPU performance monitors on ARM-based + systems. + +endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile new file mode 100644 index 000000000000..acd2397ded94 --- /dev/null +++ b/drivers/perf/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_ARM_PMU) += arm_pmu.o diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c new file mode 100644 index 000000000000..2365a32a595e --- /dev/null +++ b/drivers/perf/arm_pmu.c @@ -0,0 +1,921 @@ +#undef DEBUG + +/* + * ARM performance counter support. + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * Copyright (C) 2010 ARM Ltd., Will Deacon + * + * This code is based on the sparc64 perf event code, which is in turn based + * on the x86 code. + */ +#define pr_fmt(fmt) "hw perfevents: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static int +armpmu_map_cache_event(const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u64 config) +{ + unsigned int cache_type, cache_op, cache_result, ret; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; + + if (ret == CACHE_OP_UNSUPPORTED) + return -ENOENT; + + return ret; +} + +static int +armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) +{ + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + + mapping = (*event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; +} + +static int +armpmu_map_raw_event(u32 raw_event_mask, u64 config) +{ + return (int)(config & raw_event_mask); +} + +int +armpmu_map_event(struct perf_event *event, + const unsigned (*event_map)[PERF_COUNT_HW_MAX], + const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u32 raw_event_mask) +{ + u64 config = event->attr.config; + int type = event->attr.type; + + if (type == event->pmu->type) + return armpmu_map_raw_event(raw_event_mask, config); + + switch (type) { + case PERF_TYPE_HARDWARE: + return armpmu_map_hw_event(event_map, config); + case PERF_TYPE_HW_CACHE: + return armpmu_map_cache_event(cache_map, config); + case PERF_TYPE_RAW: + return armpmu_map_raw_event(raw_event_mask, config); + } + + return -ENOENT; +} + +int armpmu_event_set_period(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + /* + * Limit the maximum period to prevent the counter value + * from overtaking the one we are about to program. In + * effect we are reducing max_period to account for + * interrupt latency (and we are being very conservative). + */ + if (left > (armpmu->max_period >> 1)) + left = armpmu->max_period >> 1; + + local64_set(&hwc->prev_count, (u64)-left); + + armpmu->write_counter(event, (u64)(-left) & 0xffffffff); + + perf_event_update_userpage(event); + + return ret; +} + +u64 armpmu_event_update(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 delta, prev_raw_count, new_raw_count; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = armpmu->read_counter(event); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count - prev_raw_count) & armpmu->max_period; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static void +armpmu_read(struct perf_event *event) +{ + armpmu_event_update(event); +} + +static void +armpmu_stop(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* + * ARM pmu always has to update the counter, so ignore + * PERF_EF_UPDATE, see comments in armpmu_start(). + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + armpmu->disable(event); + armpmu_event_update(event); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static void armpmu_start(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* + * ARM pmu always has to reprogram the period, so ignore + * PERF_EF_RELOAD, see the comment below. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + /* + * Set the period again. Some counters can't be stopped, so when we + * were stopped we simply disabled the IRQ source and the counter + * may have been left counting. If we don't do this step then we may + * get an interrupt too soon or *way* too late if the overflow has + * happened since disabling. + */ + armpmu_event_set_period(event); + armpmu->enable(event); +} + +static void +armpmu_del(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + armpmu_stop(event, PERF_EF_UPDATE); + hw_events->events[idx] = NULL; + clear_bit(idx, hw_events->used_mask); + if (armpmu->clear_event_idx) + armpmu->clear_event_idx(hw_events, event); + + perf_event_update_userpage(event); +} + +static int +armpmu_add(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + int err = 0; + + /* An event following a process won't be stopped earlier */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return -ENOENT; + + perf_pmu_disable(event->pmu); + + /* If we don't have a space for the counter then finish early. */ + idx = armpmu->get_event_idx(hw_events, event); + if (idx < 0) { + err = idx; + goto out; + } + + /* + * If there is an event in the counter we are going to use then make + * sure it is disabled. + */ + event->hw.idx = idx; + armpmu->disable(event); + hw_events->events[idx] = event; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + armpmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; +} + +static int +validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, + struct perf_event *event) +{ + struct arm_pmu *armpmu; + + if (is_software_event(event)) + return 1; + + /* + * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The + * core perf code won't check that the pmu->ctx == leader->ctx + * until after pmu->event_init(event). + */ + if (event->pmu != pmu) + return 0; + + if (event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) + return 1; + + armpmu = to_arm_pmu(event->pmu); + return armpmu->get_event_idx(hw_events, event) >= 0; +} + +static int +validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct pmu_hw_events fake_pmu; + + /* + * Initialise the fake PMU. We only need to populate the + * used_mask for the purposes of validation. + */ + memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); + + if (!validate_event(event->pmu, &fake_pmu, leader)) + return -EINVAL; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(event->pmu, &fake_pmu, sibling)) + return -EINVAL; + } + + if (!validate_event(event->pmu, &fake_pmu, event)) + return -EINVAL; + + return 0; +} + +static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) +{ + struct arm_pmu *armpmu; + struct platform_device *plat_device; + struct arm_pmu_platdata *plat; + int ret; + u64 start_clock, finish_clock; + + /* + * we request the IRQ with a (possibly percpu) struct arm_pmu**, but + * the handlers expect a struct arm_pmu*. The percpu_irq framework will + * do any necessary shifting, we just need to perform the first + * dereference. + */ + armpmu = *(void **)dev; + plat_device = armpmu->plat_device; + plat = dev_get_platdata(&plat_device->dev); + + start_clock = sched_clock(); + if (plat && plat->handle_irq) + ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); + else + ret = armpmu->handle_irq(irq, armpmu); + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return ret; +} + +static void +armpmu_release_hardware(struct arm_pmu *armpmu) +{ + armpmu->free_irq(armpmu); +} + +static int +armpmu_reserve_hardware(struct arm_pmu *armpmu) +{ + int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); + if (err) { + armpmu_release_hardware(armpmu); + return err; + } + + return 0; +} + +static void +hw_perf_event_destroy(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + atomic_t *active_events = &armpmu->active_events; + struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; + + if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { + armpmu_release_hardware(armpmu); + mutex_unlock(pmu_reserve_mutex); + } +} + +static int +event_requires_mode_exclusion(struct perf_event_attr *attr) +{ + return attr->exclude_idle || attr->exclude_user || + attr->exclude_kernel || attr->exclude_hv; +} + +static int +__hw_perf_event_init(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int mapping; + + mapping = armpmu->map_event(event); + + if (mapping < 0) { + pr_debug("event %x:%llx not supported\n", event->attr.type, + event->attr.config); + return mapping; + } + + /* + * We don't assign an index until we actually place the event onto + * hardware. Use -1 to signify that we haven't decided where to put it + * yet. For SMP systems, each core has it's own PMU so we can't do any + * clever allocation or constraints checking at this point. + */ + hwc->idx = -1; + hwc->config_base = 0; + hwc->config = 0; + hwc->event_base = 0; + + /* + * Check whether we need to exclude the counter from certain modes. + */ + if ((!armpmu->set_event_filter || + armpmu->set_event_filter(hwc, &event->attr)) && + event_requires_mode_exclusion(&event->attr)) { + pr_debug("ARM performance counters do not support " + "mode exclusion\n"); + return -EOPNOTSUPP; + } + + /* + * Store the event encoding into the config_base field. + */ + hwc->config_base |= (unsigned long)mapping; + + if (!is_sampling_event(event)) { + /* + * For non-sampling runs, limit the sample_period to half + * of the counter width. That way, the new counter value + * is far less likely to overtake the previous one unless + * you have some serious IRQ latency issues. + */ + hwc->sample_period = armpmu->max_period >> 1; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + if (event->group_leader != event) { + if (validate_group(event) != 0) + return -EINVAL; + } + + return 0; +} + +static int armpmu_event_init(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + int err = 0; + atomic_t *active_events = &armpmu->active_events; + + /* + * Reject CPU-affine events for CPUs that are of a different class to + * that which this PMU handles. Process-following events (where + * event->cpu == -1) can be migrated between CPUs, and thus we have to + * reject them later (in armpmu_add) if they're scheduled on a + * different class of CPU. + */ + if (event->cpu != -1 && + !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) + return -ENOENT; + + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + if (armpmu->map_event(event) == -ENOENT) + return -ENOENT; + + event->destroy = hw_perf_event_destroy; + + if (!atomic_inc_not_zero(active_events)) { + mutex_lock(&armpmu->reserve_mutex); + if (atomic_read(active_events) == 0) + err = armpmu_reserve_hardware(armpmu); + + if (!err) + atomic_inc(active_events); + mutex_unlock(&armpmu->reserve_mutex); + } + + if (err) + return err; + + err = __hw_perf_event_init(event); + if (err) + hw_perf_event_destroy(event); + + return err; +} + +static void armpmu_enable(struct pmu *pmu) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; + + if (enabled) + armpmu->start(armpmu); +} + +static void armpmu_disable(struct pmu *pmu) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu); + + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; + + armpmu->stop(armpmu); +} + +/* + * In heterogeneous systems, events are specific to a particular + * microarchitecture, and aren't suitable for another. Thus, only match CPUs of + * the same microarchitecture. + */ +static int armpmu_filter_match(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + unsigned int cpu = smp_processor_id(); + return cpumask_test_cpu(cpu, &armpmu->supported_cpus); +} + +static void armpmu_init(struct arm_pmu *armpmu) +{ + atomic_set(&armpmu->active_events, 0); + mutex_init(&armpmu->reserve_mutex); + + armpmu->pmu = (struct pmu) { + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, + .filter_match = armpmu_filter_match, + }; +} + +int armpmu_register(struct arm_pmu *armpmu, int type) +{ + armpmu_init(armpmu); + pr_info("enabled with %s PMU driver, %d counters available\n", + armpmu->name, armpmu->num_events); + return perf_pmu_register(&armpmu->pmu, armpmu->name, type); +} + +/* Set at runtime when we know what CPU type we are. */ +static struct arm_pmu *__oprofile_cpu_pmu; + +/* + * Despite the names, these two functions are CPU-specific and are used + * by the OProfile/perf code. + */ +const char *perf_pmu_name(void) +{ + if (!__oprofile_cpu_pmu) + return NULL; + + return __oprofile_cpu_pmu->name; +} +EXPORT_SYMBOL_GPL(perf_pmu_name); + +int perf_num_counters(void) +{ + int max_events = 0; + + if (__oprofile_cpu_pmu != NULL) + max_events = __oprofile_cpu_pmu->num_events; + + return max_events; +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +static void cpu_pmu_enable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void cpu_pmu_disable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + disable_percpu_irq(irq); +} + +static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) +{ + int i, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &hw_events->percpu_pmu); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + } + } +} + +static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) +{ + int i, err, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + if (!pmu_device) + return -ENODEV; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + return 0; + } + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &hw_events->percpu_pmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { + pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, cpu); + continue; + } + + err = request_irq(irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", + per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); + } + } + + return 0; +} + +/* + * PMU hardware loses all context when a CPU goes offline. + * When a CPU is hotplugged back in, since some hardware registers are + * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading + * junk values out of them. + */ +static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, + void *hcpu) +{ + int cpu = (unsigned long)hcpu; + struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); + + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) + return NOTIFY_DONE; + + if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) + return NOTIFY_DONE; + + if (pmu->reset) + pmu->reset(pmu); + else + return NOTIFY_DONE; + + return NOTIFY_OK; +} + +static int cpu_pmu_init(struct arm_pmu *cpu_pmu) +{ + int err; + int cpu; + struct pmu_hw_events __percpu *cpu_hw_events; + + cpu_hw_events = alloc_percpu(struct pmu_hw_events); + if (!cpu_hw_events) + return -ENOMEM; + + cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; + err = register_cpu_notifier(&cpu_pmu->hotplug_nb); + if (err) + goto out_hw_events; + + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); + raw_spin_lock_init(&events->pmu_lock); + events->percpu_pmu = cpu_pmu; + } + + cpu_pmu->hw_events = cpu_hw_events; + cpu_pmu->request_irq = cpu_pmu_request_irq; + cpu_pmu->free_irq = cpu_pmu_free_irq; + + /* Ensure the PMU has sane values out of reset. */ + if (cpu_pmu->reset) + on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, + cpu_pmu, 1); + + /* If no interrupts available, set the corresponding capability flag */ + if (!platform_get_irq(cpu_pmu->plat_device, 0)) + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + + return 0; + +out_hw_events: + free_percpu(cpu_hw_events); + return err; +} + +static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) +{ + unregister_cpu_notifier(&cpu_pmu->hotplug_nb); + free_percpu(cpu_pmu->hw_events); +} + +/* + * CPU PMU identification and probing. + */ +static int probe_current_pmu(struct arm_pmu *pmu, + const struct pmu_probe_info *info) +{ + int cpu = get_cpu(); + unsigned int cpuid = read_cpuid_id(); + int ret = -ENODEV; + + pr_info("probing PMU on CPU %d\n", cpu); + + for (; info->init != NULL; info++) { + if ((cpuid & info->mask) != info->cpuid) + continue; + ret = info->init(pmu); + break; + } + + put_cpu(); + return ret; +} + +static int of_pmu_irq_cfg(struct arm_pmu *pmu) +{ + int *irqs, i = 0; + bool using_spi = false; + struct platform_device *pdev = pmu->plat_device; + + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + if (!irqs) + return -ENOMEM; + + do { + struct device_node *dn; + int cpu, irq; + + /* See if we have an affinity entry */ + dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i); + if (!dn) + break; + + /* Check the IRQ type and prohibit a mix of PPIs and SPIs */ + irq = platform_get_irq(pdev, i); + if (irq >= 0) { + bool spi = !irq_is_percpu(irq); + + if (i > 0 && spi != using_spi) { + pr_err("PPI/SPI IRQ type mismatch for %s!\n", + dn->name); + kfree(irqs); + return -EINVAL; + } + + using_spi = spi; + } + + /* Now look up the logical CPU number */ + for_each_possible_cpu(cpu) + if (dn == of_cpu_device_node_get(cpu)) + break; + + if (cpu >= nr_cpu_ids) { + pr_warn("Failed to find logical CPU for %s\n", + dn->name); + of_node_put(dn); + cpumask_setall(&pmu->supported_cpus); + break; + } + of_node_put(dn); + + /* For SPIs, we need to track the affinity per IRQ */ + if (using_spi) { + if (i >= pdev->num_resources) { + of_node_put(dn); + break; + } + + irqs[i] = cpu; + } + + /* Keep track of the CPUs containing this PMU type */ + cpumask_set_cpu(cpu, &pmu->supported_cpus); + of_node_put(dn); + i++; + } while (1); + + /* If we didn't manage to parse anything, claim to support all CPUs */ + if (cpumask_weight(&pmu->supported_cpus) == 0) + cpumask_setall(&pmu->supported_cpus); + + /* If we matched up the IRQ affinities, use them to route the SPIs */ + if (using_spi && i == pdev->num_resources) + pmu->irq_affinity = irqs; + else + kfree(irqs); + + return 0; +} + +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table) +{ + const struct of_device_id *of_id; + const int (*init_fn)(struct arm_pmu *); + struct device_node *node = pdev->dev.of_node; + struct arm_pmu *pmu; + int ret = -ENODEV; + + pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); + if (!pmu) { + pr_info("failed to allocate PMU device!\n"); + return -ENOMEM; + } + + if (!__oprofile_cpu_pmu) + __oprofile_cpu_pmu = pmu; + + pmu->plat_device = pdev; + + if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { + init_fn = of_id->data; + + ret = of_pmu_irq_cfg(pmu); + if (!ret) + ret = init_fn(pmu); + } else { + ret = probe_current_pmu(pmu, probe_table); + cpumask_setall(&pmu->supported_cpus); + } + + if (ret) { + pr_info("failed to probe PMU!\n"); + goto out_free; + } + + ret = cpu_pmu_init(pmu); + if (ret) + goto out_free; + + ret = armpmu_register(pmu, -1); + if (ret) + goto out_destroy; + + return 0; + +out_destroy: + cpu_pmu_destroy(pmu); +out_free: + pr_info("failed to register PMU devices!\n"); + kfree(pmu); + return ret; +} diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h new file mode 100644 index 000000000000..bfa673bb822d --- /dev/null +++ b/include/linux/perf/arm_pmu.h @@ -0,0 +1,154 @@ +/* + * linux/arch/arm/include/asm/pmu.h + * + * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __ARM_PMU_H__ +#define __ARM_PMU_H__ + +#include +#include + +#include + +/* + * struct arm_pmu_platdata - ARM PMU platform data + * + * @handle_irq: an optional handler which will be called from the + * interrupt and passed the address of the low level handler, + * and can be used to implement any platform specific handling + * before or after calling it. + */ +struct arm_pmu_platdata { + irqreturn_t (*handle_irq)(int irq, void *dev, + irq_handler_t pmu_handler); +}; + +#ifdef CONFIG_ARM_PMU + +/* + * The ARMv7 CPU PMU supports up to 32 event counters. + */ +#define ARMPMU_MAX_HWEVENTS 32 + +#define HW_OP_UNSUPPORTED 0xFFFF +#define C(_x) PERF_COUNT_HW_CACHE_##_x +#define CACHE_OP_UNSUPPORTED 0xFFFF + +#define PERF_MAP_ALL_UNSUPPORTED \ + [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED + +#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ +[0 ... C(MAX) - 1] = { \ + [0 ... C(OP_MAX) - 1] = { \ + [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ + }, \ +} + +/* The events for a given PMU register set. */ +struct pmu_hw_events { + /* + * The events that are active on the PMU for the given index. + */ + struct perf_event *events[ARMPMU_MAX_HWEVENTS]; + + /* + * A 1 bit for an index indicates that the counter is being used for + * an event. A 0 means that the counter can be used. + */ + DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS); + + /* + * Hardware lock to serialize accesses to PMU registers. Needed for the + * read/modify/write sequences. + */ + raw_spinlock_t pmu_lock; + + /* + * When using percpu IRQs, we need a percpu dev_id. Place it here as we + * already have to allocate this struct per cpu. + */ + struct arm_pmu *percpu_pmu; +}; + +struct arm_pmu { + struct pmu pmu; + cpumask_t active_irqs; + cpumask_t supported_cpus; + int *irq_affinity; + char *name; + irqreturn_t (*handle_irq)(int irq_num, void *dev); + void (*enable)(struct perf_event *event); + void (*disable)(struct perf_event *event); + int (*get_event_idx)(struct pmu_hw_events *hw_events, + struct perf_event *event); + void (*clear_event_idx)(struct pmu_hw_events *hw_events, + struct perf_event *event); + int (*set_event_filter)(struct hw_perf_event *evt, + struct perf_event_attr *attr); + u32 (*read_counter)(struct perf_event *event); + void (*write_counter)(struct perf_event *event, u32 val); + void (*start)(struct arm_pmu *); + void (*stop)(struct arm_pmu *); + void (*reset)(void *); + int (*request_irq)(struct arm_pmu *, irq_handler_t handler); + void (*free_irq)(struct arm_pmu *); + int (*map_event)(struct perf_event *event); + int num_events; + atomic_t active_events; + struct mutex reserve_mutex; + u64 max_period; + struct platform_device *plat_device; + struct pmu_hw_events __percpu *hw_events; + struct notifier_block hotplug_nb; +}; + +#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) + +int armpmu_register(struct arm_pmu *armpmu, int type); + +u64 armpmu_event_update(struct perf_event *event); + +int armpmu_event_set_period(struct perf_event *event); + +int armpmu_map_event(struct perf_event *event, + const unsigned (*event_map)[PERF_COUNT_HW_MAX], + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u32 raw_event_mask); + +struct pmu_probe_info { + unsigned int cpuid; + unsigned int mask; + int (*init)(struct arm_pmu *); +}; + +#define PMU_PROBE(_cpuid, _mask, _fn) \ +{ \ + .cpuid = (_cpuid), \ + .mask = (_mask), \ + .init = (_fn), \ +} + +#define ARM_PMU_PROBE(_cpuid, _fn) \ + PMU_PROBE(_cpuid, ARM_CPU_PART_MASK, _fn) + +#define ARM_PMU_XSCALE_MASK ((0xff << 24) | ARM_CPU_XSCALE_ARCH_MASK) + +#define XSCALE_PMU_PROBE(_version, _fn) \ + PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn) + +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table); + +#endif /* CONFIG_ARM_PMU */ + +#endif /* __ARM_PMU_H__ */ -- cgit v1.2.3 From 787047eea24a2443c366679ae6b5a3873a33b64e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 29 Jul 2015 00:34:48 +0100 Subject: ARM: 8392/3: smp: Only expose /sys/.../cpuX/online if hotpluggable Writes to /sys/.../cpuX/online fail if we determine the platform doesn't support hotplug for that CPU. Furthermore, if the cpu_die op isn't specified the system hangs when we try to offline a CPU and it comes right back online unexpectedly. Let's figure this stuff out before we make the sysfs nodes so that the online file doesn't even exist if it isn't (at least sometimes) possible to hotplug the CPU. Add a new 'cpu_can_disable' op and repoint all 'cpu_disable' implementations at it because all implementers use the op to indicate if a CPU can be hotplugged or not in a static fashion. With PSCI we may need to add a 'cpu_disable' op so that the secure OS can be migrated off the CPU we're trying to hotplug. In this case, the 'cpu_can_disable' op will indicate that all CPUs are hotpluggable by returning true, but the 'cpu_disable' op will make a PSCI migration call and occasionally fail, denying the hotplug of a CPU. This shouldn't be any worse than x86 where we may indicate that all CPUs are hotpluggable but occasionally we can't offline a CPU due to check_irq_vectors_for_cpu_disable() failing to find a CPU to move vectors to. Cc: Mark Rutland Cc: Nicolas Pitre Cc: Dave Martin Acked-by: Simon Horman [shmobile portion] Tested-by: Simon Horman Cc: Magnus Damm Cc: Tested-by: Tyler Baker Cc: Geert Uytterhoeven Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/common/mcpm_platsmp.c | 12 ++++-------- arch/arm/include/asm/smp.h | 1 + arch/arm/include/asm/smp_plat.h | 9 +++++++++ arch/arm/kernel/setup.c | 2 +- arch/arm/kernel/smp.c | 15 ++++++++++++++- arch/arm/mach-shmobile/common.h | 2 +- arch/arm/mach-shmobile/platsmp.c | 4 ++-- arch/arm/mach-shmobile/smp-r8a7790.c | 2 +- arch/arm/mach-shmobile/smp-r8a7791.c | 2 +- arch/arm/mach-shmobile/smp-sh73a0.c | 2 +- 10 files changed, 35 insertions(+), 16 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c index 92e54d7c6f46..2b25b6038f66 100644 --- a/arch/arm/common/mcpm_platsmp.c +++ b/arch/arm/common/mcpm_platsmp.c @@ -65,14 +65,10 @@ static int mcpm_cpu_kill(unsigned int cpu) return !mcpm_wait_for_cpu_powerdown(pcpu, pcluster); } -static int mcpm_cpu_disable(unsigned int cpu) +static bool mcpm_cpu_can_disable(unsigned int cpu) { - /* - * We assume all CPUs may be shut down. - * This would be the hook to use for eventual Secure - * OS migration requests as described in the PSCI spec. - */ - return 0; + /* We assume all CPUs may be shut down. */ + return true; } static void mcpm_cpu_die(unsigned int cpu) @@ -92,7 +88,7 @@ static struct smp_operations __initdata mcpm_smp_ops = { .smp_secondary_init = mcpm_secondary_init, #ifdef CONFIG_HOTPLUG_CPU .cpu_kill = mcpm_cpu_kill, - .cpu_disable = mcpm_cpu_disable, + .cpu_can_disable = mcpm_cpu_can_disable, .cpu_die = mcpm_cpu_die, #endif }; diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 2f3ac1ba6fb4..318ce89eeff7 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -105,6 +105,7 @@ struct smp_operations { #ifdef CONFIG_HOTPLUG_CPU int (*cpu_kill)(unsigned int cpu); void (*cpu_die)(unsigned int cpu); + bool (*cpu_can_disable)(unsigned int cpu); int (*cpu_disable)(unsigned int cpu); #endif #endif diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index 993e5224d8f7..f9080717fc88 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -107,4 +107,13 @@ static inline u32 mpidr_hash_size(void) extern int platform_can_secondary_boot(void); extern int platform_can_cpu_hotplug(void); +#ifdef CONFIG_HOTPLUG_CPU +extern int platform_can_hotplug_cpu(unsigned int cpu); +#else +static inline int platform_can_hotplug_cpu(unsigned int cpu) +{ + return 0; +} +#endif + #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 36c18b73c1f4..6bbec6042052 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1015,7 +1015,7 @@ static int __init topology_init(void) for_each_possible_cpu(cpu) { struct cpuinfo_arm *cpuinfo = &per_cpu(cpu_data, cpu); - cpuinfo->cpu.hotpluggable = 1; + cpuinfo->cpu.hotpluggable = platform_can_hotplug_cpu(cpu); register_cpu(&cpuinfo->cpu, cpu); } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 90dfbedfbfb8..3cd846f48eaf 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -175,13 +175,26 @@ static int platform_cpu_disable(unsigned int cpu) if (smp_ops.cpu_disable) return smp_ops.cpu_disable(cpu); + return 0; +} + +int platform_can_hotplug_cpu(unsigned int cpu) +{ + /* cpu_die must be specified to support hotplug */ + if (!smp_ops.cpu_die) + return 0; + + if (smp_ops.cpu_can_disable) + return smp_ops.cpu_can_disable(cpu); + /* * By default, allow disabling all CPUs except the first one, * since this is special on a lot of platforms, e.g. because * of clock tick interrupts. */ - return cpu == 0 ? -EPERM : 0; + return cpu != 0; } + /* * __cpu_disable runs on the processor to be shutdown. */ diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h index 476092b86c6e..8d27ec546a35 100644 --- a/arch/arm/mach-shmobile/common.h +++ b/arch/arm/mach-shmobile/common.h @@ -13,7 +13,7 @@ extern void shmobile_smp_boot(void); extern void shmobile_smp_sleep(void); extern void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg); -extern int shmobile_smp_cpu_disable(unsigned int cpu); +extern bool shmobile_smp_cpu_can_disable(unsigned int cpu); extern void shmobile_boot_scu(void); extern void shmobile_smp_scu_prepare_cpus(unsigned int max_cpus); extern void shmobile_smp_scu_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c index 3923e09e966d..b23378f3d7e1 100644 --- a/arch/arm/mach-shmobile/platsmp.c +++ b/arch/arm/mach-shmobile/platsmp.c @@ -31,8 +31,8 @@ void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg) } #ifdef CONFIG_HOTPLUG_CPU -int shmobile_smp_cpu_disable(unsigned int cpu) +bool shmobile_smp_cpu_can_disable(unsigned int cpu) { - return 0; /* Hotplug of any CPU is supported */ + return true; /* Hotplug of any CPU is supported */ } #endif diff --git a/arch/arm/mach-shmobile/smp-r8a7790.c b/arch/arm/mach-shmobile/smp-r8a7790.c index 930f45cbc08a..947e437cab68 100644 --- a/arch/arm/mach-shmobile/smp-r8a7790.c +++ b/arch/arm/mach-shmobile/smp-r8a7790.c @@ -64,7 +64,7 @@ struct smp_operations r8a7790_smp_ops __initdata = { .smp_prepare_cpus = r8a7790_smp_prepare_cpus, .smp_boot_secondary = shmobile_smp_apmu_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU - .cpu_disable = shmobile_smp_cpu_disable, + .cpu_can_disable = shmobile_smp_cpu_can_disable, .cpu_die = shmobile_smp_apmu_cpu_die, .cpu_kill = shmobile_smp_apmu_cpu_kill, #endif diff --git a/arch/arm/mach-shmobile/smp-r8a7791.c b/arch/arm/mach-shmobile/smp-r8a7791.c index 5e2d1db79afa..b2508c0d276b 100644 --- a/arch/arm/mach-shmobile/smp-r8a7791.c +++ b/arch/arm/mach-shmobile/smp-r8a7791.c @@ -58,7 +58,7 @@ struct smp_operations r8a7791_smp_ops __initdata = { .smp_prepare_cpus = r8a7791_smp_prepare_cpus, .smp_boot_secondary = r8a7791_smp_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU - .cpu_disable = shmobile_smp_cpu_disable, + .cpu_can_disable = shmobile_smp_cpu_can_disable, .cpu_die = shmobile_smp_apmu_cpu_die, .cpu_kill = shmobile_smp_apmu_cpu_kill, #endif diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c index 2106d6b76a06..ae7c764fd6b4 100644 --- a/arch/arm/mach-shmobile/smp-sh73a0.c +++ b/arch/arm/mach-shmobile/smp-sh73a0.c @@ -68,7 +68,7 @@ struct smp_operations sh73a0_smp_ops __initdata = { .smp_prepare_cpus = sh73a0_smp_prepare_cpus, .smp_boot_secondary = sh73a0_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU - .cpu_disable = shmobile_smp_cpu_disable, + .cpu_can_disable = shmobile_smp_cpu_can_disable, .cpu_die = shmobile_smp_scu_cpu_die, .cpu_kill = shmobile_smp_scu_cpu_kill, #endif -- cgit v1.2.3 From 9ac87c5a0b19417f925dc61cac7198d872159a2c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 31 Jul 2015 11:28:54 +0100 Subject: ARM: 8407/1: switch_to: Remove finish_arch_switch Fold finish_arch_switch() into switch_to(), in preparation for the removal of the finish_arch_switch call from core sched code. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Russell King --- arch/arm/include/asm/switch_to.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index c99e259469f7..12ebfcc1d539 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -10,7 +10,9 @@ * CPU. */ #if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) -#define finish_arch_switch(prev) dsb(ish) +#define __complete_pending_tlbi() dsb(ish) +#else +#define __complete_pending_tlbi() #endif /* @@ -22,6 +24,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info #define switch_to(prev,next,last) \ do { \ + __complete_pending_tlbi(); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ } while (0) -- cgit v1.2.3 From 1234e3fda9aa24b2d650bbcd9ef09d5f6a12dc86 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 24 Jul 2015 09:10:55 +0100 Subject: ARM: reduce visibility of dmac_* functions The dmac_* functions are private to the ARM DMA API implementation, and should not be used by drivers. In order to discourage their use, remove their prototypes and macros from asm/*.h. We have to leave dmac_flush_range() behind as Exynos and MSM IOMMU code use these; once these sites are fixed, this can be moved also. Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 4 ---- arch/arm/include/asm/glue-cache.h | 2 -- arch/arm/mm/dma-mapping.c | 1 + arch/arm/mm/dma.h | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 6 deletions(-) create mode 100644 arch/arm/mm/dma.h (limited to 'arch/arm') diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 4812cda8fd17..c5230a44eeca 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -140,8 +140,6 @@ extern struct cpu_cache_fns cpu_cache; * is visible to DMA, or data written by DMA to system memory is * visible to the CPU. */ -#define dmac_map_area cpu_cache.dma_map_area -#define dmac_unmap_area cpu_cache.dma_unmap_area #define dmac_flush_range cpu_cache.dma_flush_range #else @@ -161,8 +159,6 @@ extern void __cpuc_flush_dcache_area(void *, size_t); * is visible to DMA, or data written by DMA to system memory is * visible to the CPU. */ -extern void dmac_map_area(const void *, size_t, int); -extern void dmac_unmap_area(const void *, size_t, int); extern void dmac_flush_range(const void *, const void *); #endif diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index a3c24cd5b7c8..cab07f69382d 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -158,8 +158,6 @@ static inline void nop_dma_unmap_area(const void *s, size_t l, int f) { } #define __cpuc_coherent_user_range __glue(_CACHE,_coherent_user_range) #define __cpuc_flush_dcache_area __glue(_CACHE,_flush_kern_dcache_area) -#define dmac_map_area __glue(_CACHE,_dma_map_area) -#define dmac_unmap_area __glue(_CACHE,_dma_unmap_area) #define dmac_flush_range __glue(_CACHE,_dma_flush_range) #endif diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1ced8a0f7a52..5edf17cf043d 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -39,6 +39,7 @@ #include #include +#include "dma.h" #include "mm.h" /* diff --git a/arch/arm/mm/dma.h b/arch/arm/mm/dma.h new file mode 100644 index 000000000000..70ea6852f94e --- /dev/null +++ b/arch/arm/mm/dma.h @@ -0,0 +1,32 @@ +#ifndef DMA_H +#define DMA_H + +#include + +#ifndef MULTI_CACHE +#define dmac_map_area __glue(_CACHE,_dma_map_area) +#define dmac_unmap_area __glue(_CACHE,_dma_unmap_area) + +/* + * These are private to the dma-mapping API. Do not use directly. + * Their sole purpose is to ensure that data held in the cache + * is visible to DMA, or data written by DMA to system memory is + * visible to the CPU. + */ +extern void dmac_map_area(const void *, size_t, int); +extern void dmac_unmap_area(const void *, size_t, int); + +#else + +/* + * These are private to the dma-mapping API. Do not use directly. + * Their sole purpose is to ensure that data held in the cache + * is visible to DMA, or data written by DMA to system memory is + * visible to the CPU. + */ +#define dmac_map_area cpu_cache.dma_map_area +#define dmac_unmap_area cpu_cache.dma_unmap_area + +#endif + +#endif -- cgit v1.2.3 From 76695af20c015206cffb84b15912be6797d0cca2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Sun, 2 Aug 2015 17:11:04 +0200 Subject: locking, arch: use WRITE_ONCE()/READ_ONCE() in smp_store_release()/smp_load_acquire() Replace ACCESS_ONCE() macro in smp_store_release() and smp_load_acquire() with WRITE_ONCE() and READ_ONCE() on x86, arm, arm64, ia64, metag, mips, powerpc, s390, sparc and asm-generic since ACCESS_ONCE() does not work reliably on non-scalar types. WRITE_ONCE() and READ_ONCE() were introduced in the following commits: 230fa253df63 ("kernel: Provide READ_ONCE and ASSIGN_ONCE") 43239cbe79fc ("kernel: Change ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val)") Signed-off-by: Andrey Konovalov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Davidlohr Bueso Acked-by: Michael Ellerman (powerpc) Acked-by: Ralf Baechle Cc: Alexander Duyck Cc: Andre Przywara Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: David S. Miller Cc: Davidlohr Bueso Cc: Dmitry Vyukov Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Heiko Carstens Cc: James Hogan Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Tony Luck Cc: Will Deacon Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/1438528264-714-1-git-send-email-andreyknvl@google.com Signed-off-by: Ingo Molnar --- arch/arm/include/asm/barrier.h | 4 ++-- arch/arm64/include/asm/barrier.h | 4 ++-- arch/ia64/include/asm/barrier.h | 4 ++-- arch/metag/include/asm/barrier.h | 4 ++-- arch/mips/include/asm/barrier.h | 4 ++-- arch/powerpc/include/asm/barrier.h | 4 ++-- arch/s390/include/asm/barrier.h | 4 ++-- arch/sparc/include/asm/barrier_64.h | 4 ++-- arch/x86/include/asm/barrier.h | 8 ++++---- include/asm-generic/barrier.h | 4 ++-- 10 files changed, 22 insertions(+), 22 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 6c2327e1c732..70393574e0fa 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -67,12 +67,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 0fa47c4275cb..ef93b20bc964 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -44,12 +44,12 @@ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index 843ba435e43b..df896a1c41d3 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -66,12 +66,12 @@ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index 5a696e507930..172b7e5efc53 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -90,12 +90,12 @@ static inline void fence(void) do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 7ecba84656d4..752e0b86c171 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -133,12 +133,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 51ccc7232042..0eca6efc0631 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -76,12 +76,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_lwsync(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_lwsync(); \ ___p1; \ diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index e6f8615a11eb..d48fe0162331 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -42,12 +42,12 @@ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 809941e33e12..14a928601657 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -60,12 +60,12 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index e51a8f803f55..d2bcfbed11c3 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -57,12 +57,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ @@ -74,12 +74,12 @@ do { \ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 55e3abc2d027..b42afada1280 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -108,12 +108,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ -- cgit v1.2.3 From 76b235c6bcb16062d663e2ee96db0b69f2e6bc14 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jul 2015 14:45:44 +0200 Subject: jump_label: Rename JUMP_LABEL_{EN,DIS}ABLE to JUMP_LABEL_{JMP,NOP} Since we've already stepped away from ENABLE is a JMP and DISABLE is a NOP with the branch_default bits, and are going to make it even worse, rename it to make it all clearer. This way we don't mix multiple levels of logic attributes, but have a plain 'physical' name for what the current instruction patching status of a jump label is. This is a first step in removing the naming confusion that has led to a stream of avoidable bugs such as: a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()") Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org [ Beefed up the changelog. ] Signed-off-by: Ingo Molnar --- arch/arm/kernel/jump_label.c | 2 +- arch/arm64/kernel/jump_label.c | 2 +- arch/mips/kernel/jump_label.c | 2 +- arch/powerpc/kernel/jump_label.c | 2 +- arch/s390/kernel/jump_label.c | 2 +- arch/sparc/kernel/jump_label.c | 2 +- arch/x86/kernel/jump_label.c | 2 +- include/linux/jump_label.h | 4 ++-- kernel/jump_label.c | 18 +++++++++--------- 9 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c index e39cbf488cfe..845a5dd9c42b 100644 --- a/arch/arm/kernel/jump_label.c +++ b/arch/arm/kernel/jump_label.c @@ -12,7 +12,7 @@ static void __arch_jump_label_transform(struct jump_entry *entry, void *addr = (void *)entry->code; unsigned int insn; - if (type == JUMP_LABEL_ENABLE) + if (type == JUMP_LABEL_JMP) insn = arm_gen_branch(entry->code, entry->target); else insn = arm_gen_nop(); diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index 4f1fec7a46db..c2dd1ad3e648 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -28,7 +28,7 @@ void arch_jump_label_transform(struct jump_entry *entry, void *addr = (void *)entry->code; u32 insn; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { insn = aarch64_insn_gen_branch_imm(entry->code, entry->target, AARCH64_INSN_BRANCH_NOLINK); diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index dda800e9e731..3e586daa3a32 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -51,7 +51,7 @@ void arch_jump_label_transform(struct jump_entry *e, /* Target must have the right alignment and ISA must be preserved. */ BUG_ON((e->target & J_ALIGN_MASK) != J_ISA_BIT); - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op; insn.j_format.target = e->target >> J_RANGE_SHIFT; } else { diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index a1ed8a8c7cb4..6472472093d0 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -17,7 +17,7 @@ void arch_jump_label_transform(struct jump_entry *entry, { u32 *addr = (u32 *)(unsigned long)entry->code; - if (type == JUMP_LABEL_ENABLE) + if (type == JUMP_LABEL_JMP) patch_branch(addr, entry->target, 0); else patch_instruction(addr, PPC_INST_NOP); diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index a90299600483..a83d2248fea9 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -64,7 +64,7 @@ static void __jump_label_transform(struct jump_entry *entry, { struct insn old, new; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { jump_label_make_nop(entry, &old); jump_label_make_branch(entry, &new); } else { diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c index 48565c11e82a..59bbeff55024 100644 --- a/arch/sparc/kernel/jump_label.c +++ b/arch/sparc/kernel/jump_label.c @@ -16,7 +16,7 @@ void arch_jump_label_transform(struct jump_entry *entry, u32 val; u32 *insn = (u32 *) (unsigned long) entry->code; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { s32 off = (s32)entry->target - (s32)entry->code; #ifdef CONFIG_SPARC64 diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 26d5a55a2736..e565e0e4d216 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -45,7 +45,7 @@ static void __jump_label_transform(struct jump_entry *entry, const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { if (init) { /* * Jump label is enabled for the first time. diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index f4de473f226b..6a8b4fe10ad8 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -86,8 +86,8 @@ struct static_key { #ifndef __ASSEMBLY__ enum jump_label_type { - JUMP_LABEL_DISABLE = 0, - JUMP_LABEL_ENABLE, + JUMP_LABEL_NOP = 0, + JUMP_LABEL_JMP, }; struct module; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 52ebaca1b9fc..96d8945c8bf3 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -65,9 +65,9 @@ void static_key_slow_inc(struct static_key *key) jump_label_lock(); if (atomic_read(&key->enabled) == 0) { if (!jump_label_get_branch_default(key)) - jump_label_update(key, JUMP_LABEL_ENABLE); + jump_label_update(key, JUMP_LABEL_JMP); else - jump_label_update(key, JUMP_LABEL_DISABLE); + jump_label_update(key, JUMP_LABEL_NOP); } atomic_inc(&key->enabled); jump_label_unlock(); @@ -88,9 +88,9 @@ static void __static_key_slow_dec(struct static_key *key, schedule_delayed_work(work, rate_limit); } else { if (!jump_label_get_branch_default(key)) - jump_label_update(key, JUMP_LABEL_DISABLE); + jump_label_update(key, JUMP_LABEL_NOP); else - jump_label_update(key, JUMP_LABEL_ENABLE); + jump_label_update(key, JUMP_LABEL_JMP); } jump_label_unlock(); } @@ -184,9 +184,9 @@ static enum jump_label_type jump_label_type(struct static_key *key) bool state = static_key_enabled(key); if ((!true_branch && state) || (true_branch && !state)) - return JUMP_LABEL_ENABLE; + return JUMP_LABEL_JMP; - return JUMP_LABEL_DISABLE; + return JUMP_LABEL_NOP; } void __init jump_label_init(void) @@ -276,7 +276,7 @@ void jump_label_apply_nops(struct module *mod) return; for (iter = iter_start; iter < iter_stop; iter++) { - arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); + arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); } } @@ -318,8 +318,8 @@ static int jump_label_add_module(struct module *mod) jlm->next = key->next; key->next = jlm; - if (jump_label_type(key) == JUMP_LABEL_ENABLE) - __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); + if (jump_label_type(key) == JUMP_LABEL_JMP) + __jump_label_update(key, iter, iter_stop, JUMP_LABEL_JMP); } return 0; -- cgit v1.2.3 From 11276d5306b8e5b438a36bbff855fe792d7eaa61 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jul 2015 15:09:55 +0200 Subject: locking/static_keys: Add a new static_key interface There are various problems and short-comings with the current static_key interface: - static_key_{true,false}() read like a branch depending on the key value, instead of the actual likely/unlikely branch depending on init value. - static_key_{true,false}() are, as stated above, tied to the static_key init values STATIC_KEY_INIT_{TRUE,FALSE}. - we're limited to the 2 (out of 4) possible options that compile to a default NOP because that's what our arch_static_branch() assembly emits. So provide a new static_key interface: DEFINE_STATIC_KEY_TRUE(name); DEFINE_STATIC_KEY_FALSE(name); Which define a key of different types with an initial true/false value. Then allow: static_branch_likely() static_branch_unlikely() to take a key of either type and emit the right instruction for the case. This means adding a second arch_static_branch_jump() assembly helper which emits a JMP per default. In order to determine the right instruction for the right state, encode the branch type in the LSB of jump_entry::key. This is the final step in removing the naming confusion that has led to a stream of avoidable bugs such as: a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()") ... but it also allows new static key combinations that will give us performance enhancements in the subsequent patches. Tested-by: Rabin Vincent # arm Signed-off-by: Peter Zijlstra (Intel) Acked-by: Michael Ellerman # ppc Acked-by: Heiko Carstens # s390 Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/jump_label.h | 25 ++++-- arch/arm64/include/asm/jump_label.h | 18 +++- arch/mips/include/asm/jump_label.h | 19 ++++- arch/powerpc/include/asm/jump_label.h | 19 ++++- arch/s390/include/asm/jump_label.h | 19 ++++- arch/sparc/include/asm/jump_label.h | 35 ++++++-- arch/x86/include/asm/jump_label.h | 21 ++++- include/linux/jump_label.h | 149 +++++++++++++++++++++++++++++++--- kernel/jump_label.c | 37 +++++++-- 9 files changed, 298 insertions(+), 44 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index 5f337dc5c108..34f7b6980d21 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -4,23 +4,32 @@ #ifndef __ASSEMBLY__ #include +#include #define JUMP_LABEL_NOP_SIZE 4 -#ifdef CONFIG_THUMB2_KERNEL -#define JUMP_LABEL_NOP "nop.w" -#else -#define JUMP_LABEL_NOP "nop" -#endif +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + WASM(nop) "\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { asm_volatile_goto("1:\n\t" - JUMP_LABEL_NOP "\n\t" + WASM(b) " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" ".popsection\n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); return false; l_yes: diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index c0e5165c2f76..1b5e0e843c3a 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -26,14 +26,28 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm goto("1: nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align 3\n\t" ".quad 1b, %l[l_yes], %c0\n\t" ".popsection\n\t" - : : "i"(key) : : l_yes); + : : "i"(&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm goto("1: b %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 3\n\t" + ".quad 1b, %l[l_yes], %c0\n\t" + ".popsection\n\t" + : : "i"(&((char *)key)[branch]) : : l_yes); return false; l_yes: diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 608aa57799c8..e77672539e8e 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -26,14 +26,29 @@ #define NOP_INSN "nop" #endif -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:\t" NOP_INSN "\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\tj %l[l_yes]\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + WORD_INSN " 1b, %l[l_yes], %0\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index efbf9a322a23..47e155f15433 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -18,14 +18,29 @@ #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" ".popsection \n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + "b %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 69972b7957ee..7f9fd5e3f1bf 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -12,14 +12,29 @@ * We use a brcl 0,2 instruction for jump labels at compile time so it * can be easily distinguished from a hotpatch generated instruction. */ -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n" ".pushsection __jump_table, \"aw\"\n" ".balign 8\n" ".quad 0b, %l[label], %0\n" ".popsection\n" - : : "X" (key) : : label); + : : "X" (&((char *)key)[branch]) : : label); + + return false; +label: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("0: brcl 15, %l[label]\n" + ".pushsection __jump_table, \"aw\"\n" + ".balign 8\n" + ".quad 0b, %l[label], %0\n" + ".popsection\n" + : : "X" (&((char *)key)[branch]) : : label); + return false; label: return true; diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index cc9b04a2b11b..62d0354d1727 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -7,16 +7,33 @@ #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" - "nop\n\t" - "nop\n\t" - ".pushsection __jump_table, \"aw\"\n\t" - ".align 4\n\t" - ".word 1b, %l[l_yes], %c0\n\t" - ".popsection \n\t" - : : "i" (key) : : l_yes); + asm_volatile_goto("1:\n\t" + "nop\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 4\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + "b %l[l_yes]\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 4\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a4c1cf7e93f8..28d7a857f9d1 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -16,7 +16,7 @@ # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC #endif -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:" ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" @@ -24,7 +24,24 @@ static __always_inline bool arch_static_branch(struct static_key *key) _ASM_ALIGN "\n\t" _ASM_PTR "1b, %l[l_yes], %c0 \n\t" ".popsection \n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:" + ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" + "2:\n\t" + ".pushsection __jump_table, \"aw\" \n\t" + _ASM_ALIGN "\n\t" + _ASM_PTR "1b, %l[l_yes], %c0 \n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 65f0ebac63cf..e337a1961933 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -107,12 +107,12 @@ static inline int static_key_count(struct static_key *key) static __always_inline bool static_key_false(struct static_key *key) { - return arch_static_branch(key); + return arch_static_branch(key, false); } static __always_inline bool static_key_true(struct static_key *key) { - return !static_key_false(key); + return !arch_static_branch(key, true); } extern struct jump_entry __start___jump_table[]; @@ -130,12 +130,12 @@ extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); -#define STATIC_KEY_INIT_TRUE ((struct static_key) \ +#define STATIC_KEY_INIT_TRUE \ { .enabled = ATOMIC_INIT(1), \ - .entries = (void *)JUMP_TYPE_TRUE }) -#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + .entries = (void *)JUMP_TYPE_TRUE } +#define STATIC_KEY_INIT_FALSE \ { .enabled = ATOMIC_INIT(0), \ - .entries = (void *)JUMP_TYPE_FALSE }) + .entries = (void *)JUMP_TYPE_FALSE } #else /* !HAVE_JUMP_LABEL */ @@ -183,10 +183,8 @@ static inline int jump_label_apply_nops(struct module *mod) return 0; } -#define STATIC_KEY_INIT_TRUE ((struct static_key) \ - { .enabled = ATOMIC_INIT(1) }) -#define STATIC_KEY_INIT_FALSE ((struct static_key) \ - { .enabled = ATOMIC_INIT(0) }) +#define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } +#define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } #endif /* HAVE_JUMP_LABEL */ @@ -218,6 +216,137 @@ static inline void static_key_disable(struct static_key *key) static_key_slow_dec(key); } +/* -------------------------------------------------------------------------- */ + +/* + * Two type wrappers around static_key, such that we can use compile time + * type differentiation to emit the right code. + * + * All the below code is macros in order to play type games. + */ + +struct static_key_true { + struct static_key key; +}; + +struct static_key_false { + struct static_key key; +}; + +#define STATIC_KEY_TRUE_INIT (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE, } +#define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, } + +#define DEFINE_STATIC_KEY_TRUE(name) \ + struct static_key_true name = STATIC_KEY_TRUE_INIT + +#define DEFINE_STATIC_KEY_FALSE(name) \ + struct static_key_false name = STATIC_KEY_FALSE_INIT + +#ifdef HAVE_JUMP_LABEL + +/* + * Combine the right initial value (type) with the right branch order + * to generate the desired result. + * + * + * type\branch| likely (1) | unlikely (0) + * -----------+-----------------------+------------------ + * | | + * true (1) | ... | ... + * | NOP | JMP L + * | | 1: ... + * | L: ... | + * | | + * | | L: + * | | jmp 1b + * | | + * -----------+-----------------------+------------------ + * | | + * false (0) | ... | ... + * | JMP L | NOP + * | | 1: ... + * | L: ... | + * | | + * | | L: + * | | jmp 1b + * | | + * -----------+-----------------------+------------------ + * + * The initial value is encoded in the LSB of static_key::entries, + * type: 0 = false, 1 = true. + * + * The branch type is encoded in the LSB of jump_entry::key, + * branch: 0 = unlikely, 1 = likely. + * + * This gives the following logic table: + * + * enabled type branch instuction + * -----------------------------+----------- + * 0 0 0 | NOP + * 0 0 1 | JMP + * 0 1 0 | NOP + * 0 1 1 | JMP + * + * 1 0 0 | JMP + * 1 0 1 | NOP + * 1 1 0 | JMP + * 1 1 1 | NOP + * + * Which gives the following functions: + * + * dynamic: instruction = enabled ^ branch + * static: instruction = type ^ branch + * + * See jump_label_type() / jump_label_init_type(). + */ + +extern bool ____wrong_branch_error(void); + +#define static_branch_likely(x) \ +({ \ + bool branch; \ + if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ + branch = !arch_static_branch(&(x)->key, true); \ + else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ + branch = !arch_static_branch_jump(&(x)->key, true); \ + else \ + branch = ____wrong_branch_error(); \ + branch; \ +}) + +#define static_branch_unlikely(x) \ +({ \ + bool branch; \ + if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ + branch = arch_static_branch_jump(&(x)->key, false); \ + else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ + branch = arch_static_branch(&(x)->key, false); \ + else \ + branch = ____wrong_branch_error(); \ + branch; \ +}) + +#else /* !HAVE_JUMP_LABEL */ + +#define static_branch_likely(x) likely(static_key_enabled(&(x)->key)) +#define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key)) + +#endif /* HAVE_JUMP_LABEL */ + +/* + * Advanced usage; refcount, branch is enabled when: count != 0 + */ + +#define static_branch_inc(x) static_key_slow_inc(&(x)->key) +#define static_branch_dec(x) static_key_slow_dec(&(x)->key) + +/* + * Normal usage; boolean enable/disable. + */ + +#define static_branch_enable(x) static_key_enable(&(x)->key) +#define static_branch_disable(x) static_key_disable(&(x)->key) + #endif /* _LINUX_JUMP_LABEL_H */ #endif /* __ASSEMBLY__ */ diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 2e7cc1e4b4b5..8fd00d892286 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -165,16 +165,22 @@ static inline bool static_key_type(struct static_key *key) static inline struct static_key *jump_entry_key(struct jump_entry *entry) { - return (struct static_key *)((unsigned long)entry->key); + return (struct static_key *)((unsigned long)entry->key & ~1UL); +} + +static bool jump_entry_branch(struct jump_entry *entry) +{ + return (unsigned long)entry->key & 1UL; } static enum jump_label_type jump_label_type(struct jump_entry *entry) { struct static_key *key = jump_entry_key(entry); bool enabled = static_key_enabled(key); - bool type = static_key_type(key); + bool branch = jump_entry_branch(entry); - return enabled ^ type; + /* See the comment in linux/jump_label.h */ + return enabled ^ branch; } static void __jump_label_update(struct static_key *key, @@ -205,7 +211,10 @@ void __init jump_label_init(void) for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk; - arch_jump_label_transform_static(iter, jump_label_type(iter)); + /* rewrite NOPs */ + if (jump_label_type(iter) == JUMP_LABEL_NOP) + arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); + iterk = jump_entry_key(iter); if (iterk == key) continue; @@ -225,6 +234,16 @@ void __init jump_label_init(void) #ifdef CONFIG_MODULES +static enum jump_label_type jump_label_init_type(struct jump_entry *entry) +{ + struct static_key *key = jump_entry_key(entry); + bool type = static_key_type(key); + bool branch = jump_entry_branch(entry); + + /* See the comment in linux/jump_label.h */ + return type ^ branch; +} + struct static_key_mod { struct static_key_mod *next; struct jump_entry *entries; @@ -276,8 +295,11 @@ void jump_label_apply_nops(struct module *mod) if (iter_start == iter_stop) return; - for (iter = iter_start; iter < iter_stop; iter++) - arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); + for (iter = iter_start; iter < iter_stop; iter++) { + /* Only write NOPs for arch_branch_static(). */ + if (jump_label_init_type(iter) == JUMP_LABEL_NOP) + arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); + } } static int jump_label_add_module(struct module *mod) @@ -318,7 +340,8 @@ static int jump_label_add_module(struct module *mod) jlm->next = key->next; key->next = jlm; - if (jump_label_type(iter) == JUMP_LABEL_JMP) + /* Only update if we've changed from our initial state */ + if (jump_label_type(iter) != jump_label_init_type(iter)) __jump_label_update(key, iter, iter_stop); } -- cgit v1.2.3 From 37cf524f9360f9165d67459b7bf795c01824df98 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 31 Jul 2015 15:46:18 +0100 Subject: ARM: psci: boot_secondary: replace __pa with virt_to_idmap On some PAE systems (e.g. TI Keystone), memory is above the 32-bit addressable limit, and the interconnect provides an aliased view of parts of physical memory in the 32-bit addressable space. This alias is strictly for boot time usage, and is not otherwise usable because of coherency limitations. In this case, virt_to_phys(secondary_startup) would return the physical address of the secondary CPU boot entry point, but on such systems, this would be above the 4GB limit. A separate function, virt_to_idmap(), has been provided to return a usable physical address for functions in the identity mapping, and this must be used in preference to virt_to_phys() or __pa() to find the physical entry point for functions in the identity mapping range. For other systems, virt_to_idmap() and virt_to_phys() return identical physical addresses. Acked-by: Santosh Shilimkar Acked-by: Nicolas Pitre Tested-by Vitaly Andrianov Signed-off-by: Grygorii Strashko [Mark: apply rmk's suggested rewording] Signed-off-by: Mark Rutland Cc: Russell King Signed-off-by: Will Deacon --- arch/arm/kernel/psci_smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index 28a1db4da704..244aaddfbfda 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -51,7 +51,7 @@ static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle) { if (psci_ops.cpu_on) return psci_ops.cpu_on(cpu_logical_map(cpu), - __pa(secondary_startup)); + virt_to_idmap(&secondary_startup)); return -ENODEV; } -- cgit v1.2.3 From be120397e7709d9d5ed88317a385ce864a2603bc Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 31 Jul 2015 15:46:19 +0100 Subject: ARM: migrate to common PSCI client code Now that the common PSCI client code has been factored out to drivers/firmware, and made safe for 32-bit use, move the 32-bit ARM code over to it. This results in a moderate reduction of duplicated lines, and will prevent further duplication as the PSCI client code is updated for PSCI 1.0 and beyond. The two legacy platform users of the PSCI invocation code are updated to account for interface changes. In both cases the power state parameter (which is constant) is now generated using macros, so that the pack/unpack logic can be killed in preparation for PSCI 1.0 power state changes. Signed-off-by: Mark Rutland Acked-by: Rob Herring Cc: Catalin Marinas Cc: Ashwin Chaugule Cc: Lorenzo Pieralisi Cc: Russell King Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm/Kconfig | 1 + arch/arm/include/asm/psci.h | 23 --- arch/arm/kernel/Makefile | 2 +- arch/arm/kernel/psci.c | 299 -------------------------------------- arch/arm/kernel/psci_smp.c | 29 +++- arch/arm/kernel/setup.c | 3 +- arch/arm/mach-highbank/highbank.c | 2 +- arch/arm/mach-highbank/pm.c | 16 +- drivers/cpuidle/cpuidle-calxeda.c | 15 +- 9 files changed, 46 insertions(+), 344 deletions(-) delete mode 100644 arch/arm/kernel/psci.c (limited to 'arch/arm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1c5021002fe4..f5cd68425d8d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1496,6 +1496,7 @@ config HOTPLUG_CPU config ARM_PSCI bool "Support for the ARM Power State Coordination Interface (PSCI)" depends on CPU_V7 + select ARM_PSCI_FW help Say Y here if you want Linux to communicate with system firmware implementing the PSCI specification for CPU-centric power diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index c25ef3ec6d1f..68ee3ce17b82 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -14,34 +14,11 @@ #ifndef __ASM_ARM_PSCI_H #define __ASM_ARM_PSCI_H -#define PSCI_POWER_STATE_TYPE_STANDBY 0 -#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 - -struct psci_power_state { - u16 id; - u8 type; - u8 affinity_level; -}; - -struct psci_operations { - int (*cpu_suspend)(struct psci_power_state state, - unsigned long entry_point); - int (*cpu_off)(struct psci_power_state state); - int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); - int (*migrate)(unsigned long cpuid); - int (*affinity_info)(unsigned long target_affinity, - unsigned long lowest_affinity_level); - int (*migrate_info_type)(void); -}; - -extern struct psci_operations psci_ops; extern struct smp_operations psci_smp_ops; #ifdef CONFIG_ARM_PSCI -int psci_init(void); bool psci_smp_available(void); #else -static inline int psci_init(void) { return 0; } static inline bool psci_smp_available(void) { return false; } #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index e69f7a19735d..3b995f5c524d 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -89,7 +89,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o ifeq ($(CONFIG_ARM_PSCI),y) -obj-y += psci.o psci-call.o +obj-y += psci-call.o obj-$(CONFIG_SMP) += psci_smp.o endif diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c deleted file mode 100644 index f90fdf4ce7c7..000000000000 --- a/arch/arm/kernel/psci.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon - */ - -#define pr_fmt(fmt) "psci: " fmt - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -struct psci_operations psci_ops; - -static int (*invoke_psci_fn)(u32, u32, u32, u32); -typedef int (*psci_initcall_t)(const struct device_node *); - -asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32); -asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32); - -enum psci_function { - PSCI_FN_CPU_SUSPEND, - PSCI_FN_CPU_ON, - PSCI_FN_CPU_OFF, - PSCI_FN_MIGRATE, - PSCI_FN_AFFINITY_INFO, - PSCI_FN_MIGRATE_INFO_TYPE, - PSCI_FN_MAX, -}; - -static u32 psci_function_id[PSCI_FN_MAX]; - -static int psci_to_linux_errno(int errno) -{ - switch (errno) { - case PSCI_RET_SUCCESS: - return 0; - case PSCI_RET_NOT_SUPPORTED: - return -EOPNOTSUPP; - case PSCI_RET_INVALID_PARAMS: - return -EINVAL; - case PSCI_RET_DENIED: - return -EPERM; - }; - - return -EINVAL; -} - -static u32 psci_power_state_pack(struct psci_power_state state) -{ - return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) - & PSCI_0_2_POWER_STATE_ID_MASK) | - ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) - & PSCI_0_2_POWER_STATE_TYPE_MASK) | - ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) - & PSCI_0_2_POWER_STATE_AFFL_MASK); -} - -static int psci_get_version(void) -{ - int err; - - err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); - return err; -} - -static int psci_cpu_suspend(struct psci_power_state state, - unsigned long entry_point) -{ - int err; - u32 fn, power_state; - - fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; - power_state = psci_power_state_pack(state); - err = invoke_psci_fn(fn, power_state, entry_point, 0); - return psci_to_linux_errno(err); -} - -static int psci_cpu_off(struct psci_power_state state) -{ - int err; - u32 fn, power_state; - - fn = psci_function_id[PSCI_FN_CPU_OFF]; - power_state = psci_power_state_pack(state); - err = invoke_psci_fn(fn, power_state, 0, 0); - return psci_to_linux_errno(err); -} - -static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_CPU_ON]; - err = invoke_psci_fn(fn, cpuid, entry_point, 0); - return psci_to_linux_errno(err); -} - -static int psci_migrate(unsigned long cpuid) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_MIGRATE]; - err = invoke_psci_fn(fn, cpuid, 0, 0); - return psci_to_linux_errno(err); -} - -static int psci_affinity_info(unsigned long target_affinity, - unsigned long lowest_affinity_level) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; - err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); - return err; -} - -static int psci_migrate_info_type(void) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; - err = invoke_psci_fn(fn, 0, 0, 0); - return err; -} - -static int get_set_conduit_method(struct device_node *np) -{ - const char *method; - - pr_info("probing for conduit method from DT.\n"); - - if (of_property_read_string(np, "method", &method)) { - pr_warn("missing \"method\" property\n"); - return -ENXIO; - } - - if (!strcmp("hvc", method)) { - invoke_psci_fn = __invoke_psci_fn_hvc; - } else if (!strcmp("smc", method)) { - invoke_psci_fn = __invoke_psci_fn_smc; - } else { - pr_warn("invalid \"method\" property: %s\n", method); - return -EINVAL; - } - return 0; -} - -static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); -} - -static void psci_sys_poweroff(void) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); -} - -/* - * PSCI Function IDs for v0.2+ are well defined so use - * standard values. - */ -static int psci_0_2_init(struct device_node *np) -{ - int err, ver; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - - ver = psci_get_version(); - - if (ver == PSCI_RET_NOT_SUPPORTED) { - /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ - pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); - err = -EOPNOTSUPP; - goto out_put_node; - } else { - pr_info("PSCIv%d.%d detected in firmware.\n", - PSCI_VERSION_MAJOR(ver), - PSCI_VERSION_MINOR(ver)); - - if (PSCI_VERSION_MAJOR(ver) == 0 && - PSCI_VERSION_MINOR(ver) < 2) { - err = -EINVAL; - pr_err("Conflicting PSCI version detected.\n"); - goto out_put_node; - } - } - - pr_info("Using standard PSCI v0.2 function IDs\n"); - psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND; - psci_ops.cpu_suspend = psci_cpu_suspend; - - psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; - psci_ops.cpu_off = psci_cpu_off; - - psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON; - psci_ops.cpu_on = psci_cpu_on; - - psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE; - psci_ops.migrate = psci_migrate; - - psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO; - psci_ops.affinity_info = psci_affinity_info; - - psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = - PSCI_0_2_FN_MIGRATE_INFO_TYPE; - psci_ops.migrate_info_type = psci_migrate_info_type; - - arm_pm_restart = psci_sys_reset; - - pm_power_off = psci_sys_poweroff; - -out_put_node: - of_node_put(np); - return err; -} - -/* - * PSCI < v0.2 get PSCI Function IDs via DT. - */ -static int psci_0_1_init(struct device_node *np) -{ - u32 id; - int err; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - - pr_info("Using PSCI v0.1 Function IDs from DT\n"); - - if (!of_property_read_u32(np, "cpu_suspend", &id)) { - psci_function_id[PSCI_FN_CPU_SUSPEND] = id; - psci_ops.cpu_suspend = psci_cpu_suspend; - } - - if (!of_property_read_u32(np, "cpu_off", &id)) { - psci_function_id[PSCI_FN_CPU_OFF] = id; - psci_ops.cpu_off = psci_cpu_off; - } - - if (!of_property_read_u32(np, "cpu_on", &id)) { - psci_function_id[PSCI_FN_CPU_ON] = id; - psci_ops.cpu_on = psci_cpu_on; - } - - if (!of_property_read_u32(np, "migrate", &id)) { - psci_function_id[PSCI_FN_MIGRATE] = id; - psci_ops.migrate = psci_migrate; - } - -out_put_node: - of_node_put(np); - return err; -} - -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", .data = psci_0_1_init}, - { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, - {}, -}; - -int __init psci_init(void) -{ - struct device_node *np; - const struct of_device_id *matched_np; - psci_initcall_t init_fn; - - np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); - if (!np) - return -ENODEV; - - init_fn = (psci_initcall_t)matched_np->data; - return init_fn(np); -} diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index 244aaddfbfda..61c04b02faeb 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -17,6 +17,8 @@ #include #include #include +#include + #include #include @@ -56,17 +58,29 @@ static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle) } #ifdef CONFIG_HOTPLUG_CPU +int psci_cpu_disable(unsigned int cpu) +{ + /* Fail early if we don't have CPU_OFF support */ + if (!psci_ops.cpu_off) + return -EOPNOTSUPP; + + /* Trusted OS will deny CPU_OFF */ + if (psci_tos_resident_on(cpu)) + return -EPERM; + + return 0; +} + void __ref psci_cpu_die(unsigned int cpu) { - const struct psci_power_state ps = { - .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, - }; + u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << + PSCI_0_2_POWER_STATE_TYPE_SHIFT; - if (psci_ops.cpu_off) - psci_ops.cpu_off(ps); + if (psci_ops.cpu_off) + psci_ops.cpu_off(state); - /* We should never return */ - panic("psci: cpu %d failed to shutdown\n", cpu); + /* We should never return */ + panic("psci: cpu %d failed to shutdown\n", cpu); } int __ref psci_cpu_kill(unsigned int cpu) @@ -109,6 +123,7 @@ bool __init psci_smp_available(void) struct smp_operations __initdata psci_smp_ops = { .smp_boot_secondary = psci_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = psci_cpu_disable, .cpu_die = psci_cpu_die, .cpu_kill = psci_cpu_kill, #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 36c18b73c1f4..9c38bd42f04b 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -972,7 +973,7 @@ void __init setup_arch(char **cmdline_p) unflatten_device_tree(); arm_dt_init_cpu_maps(); - psci_init(); + psci_dt_init(); xen_early_init(); #ifdef CONFIG_SMP if (is_smp()) { diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index 231fba0d03e5..6050a14faee6 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -28,8 +28,8 @@ #include #include #include +#include -#include #include #include #include diff --git a/arch/arm/mach-highbank/pm.c b/arch/arm/mach-highbank/pm.c index 7f2bd85eb935..400311695548 100644 --- a/arch/arm/mach-highbank/pm.c +++ b/arch/arm/mach-highbank/pm.c @@ -16,19 +16,21 @@ #include #include +#include #include #include -#include + +#include + +#define HIGHBANK_SUSPEND_PARAM \ + ((0 << PSCI_0_2_POWER_STATE_ID_SHIFT) | \ + (1 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) | \ + (PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT)) static int highbank_suspend_finish(unsigned long val) { - const struct psci_power_state ps = { - .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, - .affinity_level = 1, - }; - - return psci_ops.cpu_suspend(ps, __pa(cpu_resume)); + return psci_ops.cpu_suspend(HIGHBANK_SUSPEND_PARAM, __pa(cpu_resume)); } static int highbank_pm_enter(suspend_state_t state) diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c index c13feec89ea1..ea9728fde9b3 100644 --- a/drivers/cpuidle/cpuidle-calxeda.c +++ b/drivers/cpuidle/cpuidle-calxeda.c @@ -25,16 +25,21 @@ #include #include #include +#include + #include #include -#include + +#include + +#define CALXEDA_IDLE_PARAM \ + ((0 << PSCI_0_2_POWER_STATE_ID_SHIFT) | \ + (0 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) | \ + (PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT)) static int calxeda_idle_finish(unsigned long val) { - const struct psci_power_state ps = { - .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, - }; - return psci_ops.cpu_suspend(ps, __pa(cpu_resume)); + return psci_ops.cpu_suspend(CALXEDA_IDLE_PARAM, __pa(cpu_resume)); } static int calxeda_pwrdown_idle(struct cpuidle_device *dev, -- cgit v1.2.3 From 4c2880b31c700b03f3f115b5ca64be615783aa9c Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 31 Jul 2015 09:44:12 +0100 Subject: irqchip/gic: Ensure gic_cpu_if_up/down() programs correct GIC instance Commit 3228950621d9 ("irqchip: gic: Preserve gic V2 bypass bits in cpu ctrl register") added a new function, gic_cpu_if_up(), to program the GIC CPU_CTRL register. This function assumes that there is only one GIC instance present and hence always uses the chip data for the primary GIC controller. Although it is not common for there to be a secondary, some devices do support a secondary. Therefore, fix this by passing gic_cpu_if_up() a pointer to the appropriate chip data structure. Similarly, the function gic_cpu_if_down() only assumes that there is a single GIC instance present. Update this function so that an instance number is passed for the appropriate GIC and return an error code on failure. The vexpress TC2 (which has a single GIC) is currently the only user of this function and so update it accordingly. Note that because the TC2 only has a single GIC, the call to gic_cpu_if_down() should always be successful. Signed-off-by: Jon Hunter Reviewed-by: Marc Zyngier Cc: Cc: Russell King Cc: Nicolas Pitre Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438332252-25248-2-git-send-email-jonathanh@nvidia.com Signed-off-by: Thomas Gleixner --- arch/arm/mach-vexpress/tc2_pm.c | 2 +- drivers/irqchip/irq-gic.c | 18 ++++++++++++------ include/linux/irqchip/arm-gic.h | 2 +- 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index b3328cd46c33..1aa4ccece69f 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -80,7 +80,7 @@ static void tc2_pm_cpu_powerdown_prepare(unsigned int cpu, unsigned int cluster) * to the CPU by disabling the GIC CPU IF to prevent wfi * from completing execution behind power controller back */ - gic_cpu_if_down(); + gic_cpu_if_down(0); } static void tc2_pm_cluster_powerdown_prepare(unsigned int cluster) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 84fc622d0309..aa3e7b8a69c4 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -356,9 +356,9 @@ static u8 gic_get_cpumask(struct gic_chip_data *gic) return mask; } -static void gic_cpu_if_up(void) +static void gic_cpu_if_up(struct gic_chip_data *gic) { - void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]); + void __iomem *cpu_base = gic_data_cpu_base(gic); u32 bypass = 0; /* @@ -426,17 +426,23 @@ static void gic_cpu_init(struct gic_chip_data *gic) gic_cpu_config(dist_base, NULL); writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK); - gic_cpu_if_up(); + gic_cpu_if_up(gic); } -void gic_cpu_if_down(void) +int gic_cpu_if_down(unsigned int gic_nr) { - void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]); + void __iomem *cpu_base; u32 val = 0; + if (gic_nr >= MAX_GIC_NR) + return -EINVAL; + + cpu_base = gic_data_cpu_base(&gic_data[gic_nr]); val = readl(cpu_base + GIC_CPU_CTRL); val &= ~GICC_ENABLE; writel_relaxed(val, cpu_base + GIC_CPU_CTRL); + + return 0; } #ifdef CONFIG_CPU_PM @@ -572,7 +578,7 @@ static void gic_cpu_restore(unsigned int gic_nr) dist_base + GIC_DIST_PRI + i * 4); writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK); - gic_cpu_if_up(); + gic_cpu_if_up(&gic_data[gic_nr]); } static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 61a2007eb49a..65da435d01c1 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -98,7 +98,7 @@ struct device_node; void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *, u32 offset, struct device_node *); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); -void gic_cpu_if_down(void); +int gic_cpu_if_down(unsigned int gic_nr); static inline void gic_init(unsigned int nr, int start, void __iomem *dist , void __iomem *cpu) -- cgit v1.2.3 From 21caf3a765b0a88f8fedf63b36e5d15683b73fe5 Mon Sep 17 00:00:00 2001 From: Lorenzo Nava Date: Thu, 2 Jul 2015 17:28:03 +0100 Subject: ARM: 8398/1: arm DMA: Fix allocation from CMA for coherent DMA This patch allows the use of CMA for DMA coherent memory allocation. At the moment if the input parameter "is_coherent" is set to true the allocation is not made using the CMA, which I think is not the desired behaviour. The patch covers the allocation and free of memory for coherent DMA. Signed-off-by: Lorenzo Nava Reviewed-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 5edf17cf043d..ad7419e69967 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -649,14 +649,18 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, size = PAGE_ALIGN(size); want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); - if (is_coherent || nommu()) + if (nommu()) + addr = __alloc_simple_buffer(dev, size, gfp, &page); + else if (dev_get_cma_area(dev) && (gfp & __GFP_WAIT)) + addr = __alloc_from_contiguous(dev, size, prot, &page, + caller, want_vaddr); + else if (is_coherent) addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (!(gfp & __GFP_WAIT)) addr = __alloc_from_pool(size, &page); - else if (!dev_get_cma_area(dev)) - addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller, want_vaddr); else - addr = __alloc_from_contiguous(dev, size, prot, &page, caller, want_vaddr); + addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, + caller, want_vaddr); if (page) *handle = pfn_to_dma(dev, page_to_pfn(page)); @@ -684,13 +688,12 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, static void *arm_coherent_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); void *memory; if (dma_alloc_from_coherent(dev, size, handle, &memory)) return memory; - return __dma_alloc(dev, size, handle, gfp, prot, true, + return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true, attrs, __builtin_return_address(0)); } @@ -754,12 +757,12 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, size = PAGE_ALIGN(size); - if (is_coherent || nommu()) { + if (nommu()) { __dma_free_buffer(page, size); - } else if (__free_from_pool(cpu_addr, size)) { + } else if (!is_coherent && __free_from_pool(cpu_addr, size)) { return; } else if (!dev_get_cma_area(dev)) { - if (want_vaddr) + if (want_vaddr && !is_coherent) __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { -- cgit v1.2.3 From 2584cf83578c26db144730ef498f4070f82ee3ea Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 10 Aug 2015 23:07:05 -0400 Subject: arch, drivers: don't include directly, use instead Preparation for uniform definition of ioremap, ioremap_wc, ioremap_wt, and ioremap_cache, tree-wide. Acked-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/arm/mach-shmobile/pm-rcar.c | 2 +- arch/ia64/kernel/cyclone.c | 2 +- drivers/isdn/icn/icn.h | 2 +- drivers/mtd/devices/slram.c | 2 +- drivers/mtd/nand/diskonchip.c | 2 +- drivers/mtd/onenand/generic.c | 2 +- drivers/scsi/sun3x_esp.c | 2 +- drivers/staging/comedi/drivers/ii_pci20kc.c | 1 + drivers/tty/serial/8250/8250_core.c | 2 +- drivers/video/fbdev/s1d13xxxfb.c | 3 +-- drivers/video/fbdev/stifb.c | 1 + include/linux/io-mapping.h | 2 +- include/linux/mtd/map.h | 2 +- include/video/vga.h | 2 +- 14 files changed, 14 insertions(+), 13 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/mach-shmobile/pm-rcar.c b/arch/arm/mach-shmobile/pm-rcar.c index 00022ee56f80..9d3dde00c2fe 100644 --- a/arch/arm/mach-shmobile/pm-rcar.c +++ b/arch/arm/mach-shmobile/pm-rcar.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "pm-rcar.h" /* SYSC */ diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index 4826ff957a3d..5fa3848ba224 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include /* IBM Summit (EXA) Cyclone counter code*/ #define CYCLONE_CBAR_ADDR 0xFEB00CD0 diff --git a/drivers/isdn/icn/icn.h b/drivers/isdn/icn/icn.h index b713466997a0..f8f2e76d34bf 100644 --- a/drivers/isdn/icn/icn.h +++ b/drivers/isdn/icn/icn.h @@ -38,7 +38,7 @@ typedef struct icn_cdef { #include #include #include -#include +#include #include #include #include diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c index 2fc4957cbe7f..a70eb83e68f1 100644 --- a/drivers/mtd/devices/slram.c +++ b/drivers/mtd/devices/slram.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c index 7da266a53979..0802158a3f75 100644 --- a/drivers/mtd/nand/diskonchip.c +++ b/drivers/mtd/nand/diskonchip.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c index 32a216d31141..ab7bda0bb245 100644 --- a/drivers/mtd/onenand/generic.c +++ b/drivers/mtd/onenand/generic.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include /* * Note: Driver name and platform data format have been updated! diff --git a/drivers/scsi/sun3x_esp.c b/drivers/scsi/sun3x_esp.c index e26e81de7c45..d50c5ed8f428 100644 --- a/drivers/scsi/sun3x_esp.c +++ b/drivers/scsi/sun3x_esp.c @@ -12,9 +12,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/drivers/staging/comedi/drivers/ii_pci20kc.c b/drivers/staging/comedi/drivers/ii_pci20kc.c index 0768bc42a5db..14ef1f67dd42 100644 --- a/drivers/staging/comedi/drivers/ii_pci20kc.c +++ b/drivers/staging/comedi/drivers/ii_pci20kc.c @@ -28,6 +28,7 @@ */ #include +#include #include "../comedidev.h" /* diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 37fff12dd4d0..fe902ff52e58 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -38,11 +38,11 @@ #include #include #include +#include #ifdef CONFIG_SPARC #include #endif -#include #include #include "8250.h" diff --git a/drivers/video/fbdev/s1d13xxxfb.c b/drivers/video/fbdev/s1d13xxxfb.c index 83433cb0dfba..96aa46dc696c 100644 --- a/drivers/video/fbdev/s1d13xxxfb.c +++ b/drivers/video/fbdev/s1d13xxxfb.c @@ -32,8 +32,7 @@ #include #include #include - -#include +#include #include