From c32ffce0f66e5d1d4856254516e24f5ef275cd00 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 21 Feb 2014 17:01:48 +0100 Subject: ARM: 7984/1: prefetch: add prefetchw invocations for barriered atomics After a bunch of benchmarking on the interaction between dmb and pldw, it turns out that issuing the pldw *after* the dmb instruction can give modest performance gains (~3% atomic_add_return improvement on a dual A15). This patch adds prefetchw invocations to our barriered atomic operations including cmpxchg, test_and_xxx and futexes. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/lib/bitops.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/arm/lib') diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 52886b89706c..9f12ed1eea86 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -37,6 +37,11 @@ UNWIND( .fnstart ) add r1, r1, r0, lsl #2 @ Get word offset mov r3, r2, lsl r3 @ create mask smp_dmb +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) + .arch_extension mp + ALT_SMP(W(pldw) [r1]) + ALT_UP(W(nop)) +#endif 1: ldrex r2, [r1] ands r0, r2, r3 @ save old value of bit \instr r2, r2, r3 @ toggle bit -- cgit v1.2.3