diff options
author | Vineet Gupta <vgupta@kernel.org> | 2020-04-13 18:07:49 -0700 |
---|---|---|
committer | Vineet Gupta <vgupta@kernel.org> | 2021-08-24 14:25:47 -0700 |
commit | b64be6836993c431e54fad239fcba0543854ee35 (patch) | |
tree | 9d5fe48754db52199426e9b77551521a0aa5e150 /arch/arc | |
parent | 7e8f8cbb43990861e5881594e14d491f81931f8d (diff) | |
download | lwn-b64be6836993c431e54fad239fcba0543854ee35.tar.gz lwn-b64be6836993c431e54fad239fcba0543854ee35.zip |
ARC: atomics: implement relaxed variants
The current ARC fetch/return atomics provide fully ordered semantics
only with 2 full barriers around the operation.
Instead implement them as relaxed variants without any barriers and
rely on generic code to generate the fully-ordered, acquire and release
varaints by adding the appropriate full barriers.
This helps elide some extra barriers in case of acquire/release/relaxed
calls.
bloat-o-meter for hsdk defconfig shows codegen improvements, although
numbers below inflated due to unrelated inlining heuristic changes
| bloat-o-meter vmlinux-643babe34fd7-non-relaxed vmlinux-45aa05cb44d7-relaxed
| add/remove: 2/5 grow/shrink: 42/1222 up/down: 4158/-14312 (-10154)
| Function old new delta
| ..
| sys_renameat 462 476 +14
| ip_mc_inc_group 424 436 +12
| do_read_cache_page 1882 1894 +12
| ..
| refcount_dec_and_mutex_lock 254 250 -4
| refcount_dec_and_lock_irqsave 258 254 -4
| refcount_dec_and_lock 254 250 -4
| ..
| tcp_v6_route_req 246 238 -8
| tcp_v4_destroy_sock 286 278 -8
| tcp_twsk_unique 352 344 -8
Link: https://lore.kernel.org/r/20180830144344.GW24142@hirez.programming.kicks-ass.net
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@kernel.org>
Diffstat (limited to 'arch/arc')
-rw-r--r-- | arch/arc/include/asm/atomic-llsc.h | 32 | ||||
-rw-r--r-- | arch/arc/include/asm/atomic64-arcv2.h | 24 |
2 files changed, 26 insertions, 30 deletions
diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h index aab4f2855457..088d348781c1 100644 --- a/arch/arc/include/asm/atomic-llsc.h +++ b/arch/arc/include/asm/atomic-llsc.h @@ -22,16 +22,10 @@ static inline void arch_atomic_##op(int i, atomic_t *v) \ } \ #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ -static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ +static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ { \ unsigned int val; \ \ - /* \ - * Explicit full memory barrier needed before/after as \ - * LLOCK/SCOND themselves don't provide any such semantics \ - */ \ - smp_mb(); \ - \ __asm__ __volatile__( \ "1: llock %[val], [%[ctr]] \n" \ " " #asm_op " %[val], %[val], %[i] \n" \ @@ -42,22 +36,17 @@ static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ [i] "ir" (i) \ : "cc"); \ \ - smp_mb(); \ - \ return val; \ } +#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed +#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed + #define ATOMIC_FETCH_OP(op, c_op, asm_op) \ -static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ +static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ { \ unsigned int val, orig; \ \ - /* \ - * Explicit full memory barrier needed before/after as \ - * LLOCK/SCOND themselves don't provide any such semantics \ - */ \ - smp_mb(); \ - \ __asm__ __volatile__( \ "1: llock %[orig], [%[ctr]] \n" \ " " #asm_op " %[val], %[orig], %[i] \n" \ @@ -69,11 +58,17 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ [i] "ir" (i) \ : "cc"); \ \ - smp_mb(); \ - \ return orig; \ } +#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed +#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed + +#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed +#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed +#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed +#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed + #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ ATOMIC_OP_RETURN(op, c_op, asm_op) \ @@ -93,7 +88,6 @@ ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, xor) #define arch_atomic_andnot arch_atomic_andnot -#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h index 22ef1cbb94e2..c5a8010fdc97 100644 --- a/arch/arc/include/asm/atomic64-arcv2.h +++ b/arch/arc/include/asm/atomic64-arcv2.h @@ -64,12 +64,10 @@ static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \ } \ #define ATOMIC64_OP_RETURN(op, op1, op2) \ -static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \ +static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \ { \ s64 val; \ \ - smp_mb(); \ - \ __asm__ __volatile__( \ "1: \n" \ " llockd %0, [%1] \n" \ @@ -81,18 +79,17 @@ static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \ : "r"(&v->counter), "ir"(a) \ : "cc"); /* memory clobber comes from smp_mb() */ \ \ - smp_mb(); \ - \ return val; \ } +#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed +#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed + #define ATOMIC64_FETCH_OP(op, op1, op2) \ -static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \ +static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \ { \ s64 val, orig; \ \ - smp_mb(); \ - \ __asm__ __volatile__( \ "1: \n" \ " llockd %0, [%2] \n" \ @@ -104,11 +101,17 @@ static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \ : "r"(&v->counter), "ir"(a) \ : "cc"); /* memory clobber comes from smp_mb() */ \ \ - smp_mb(); \ - \ return orig; \ } +#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed +#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed + +#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed +#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed +#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed +#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed + #define ATOMIC64_OPS(op, op1, op2) \ ATOMIC64_OP(op, op1, op2) \ ATOMIC64_OP_RETURN(op, op1, op2) \ @@ -128,7 +131,6 @@ ATOMIC64_OPS(or, or, or) ATOMIC64_OPS(xor, xor, xor) #define arch_atomic64_andnot arch_atomic64_andnot -#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot #undef ATOMIC64_OPS #undef ATOMIC64_FETCH_OP |